Переглянути джерело

HDFS-5797. Implement offline image viewer. Contributed by Haohui Mai.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-5698@1561808 13f79535-47bb-0310-9956-ffa450edef68
Jing Zhao 11 роки тому
батько
коміт
e9a5f67b51

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES_HDFS-5698.txt

@@ -27,3 +27,5 @@ HDFS-5698 subtasks
 
     HDFS-5826. Update the stored edit logs to be consistent with the changes in 
     HDFS-5698 branch. (Haohui Mai via jing9)
+
+    HDFS-5797. Implement offline image viewer. (Haohui Mai via jing9)

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml

@@ -8,6 +8,9 @@
      <Match>
        <Package name="org.apache.hadoop.hdfs.server.namenode.ha.proto" />
      </Match>
+     <Match>
+       <Class name="~org.apache.hadoop.hdfs.server.namenode.FsImageProto.*" />
+     </Match>
      <Match>
        <Package name="org.apache.hadoop.hdfs.qjournal.protocol" />
      </Match>

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs

@@ -139,7 +139,7 @@ elif [ "$COMMAND" = "balancer" ] ; then
 elif [ "$COMMAND" = "jmxget" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.JMXGet
 elif [ "$COMMAND" = "oiv" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
+  CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
 elif [ "$COMMAND" = "oev" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
 elif [ "$COMMAND" = "fetchdt" ] ; then

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java

@@ -208,12 +208,12 @@ public class FSImageFormat {
     public void load(File file) throws IOException {
       Preconditions.checkState(impl == null, "Image already loaded!");
 
-      byte[] magic = new byte[FSImageFormatProtobuf.MAGIC_HEADER.length];
+      byte[] magic = new byte[FSImageUtil.MAGIC_HEADER.length];
       FileInputStream is = null;
       try {
         is = new FileInputStream(file);
         if (is.read(magic) == magic.length
-            && Arrays.equals(magic, FSImageFormatProtobuf.MAGIC_HEADER)) {
+            && Arrays.equals(magic, FSImageUtil.MAGIC_HEADER)) {
           FSImageFormatProtobuf.Loader loader = new FSImageFormatProtobuf.Loader(
               conf, fsn);
           impl = loader;

+ 9 - 60
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java

@@ -20,7 +20,6 @@ package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
-import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
@@ -33,7 +32,6 @@ import java.nio.channels.FileChannel;
 import java.security.DigestOutputStream;
 import java.security.MessageDigest;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Map;
@@ -45,7 +43,6 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
-import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
@@ -72,11 +69,8 @@ import com.google.protobuf.CodedOutputStream;
 public final class FSImageFormatProtobuf {
   private static final Log LOG = LogFactory.getLog(FSImageFormatProtobuf.class);
 
-  static final byte[] MAGIC_HEADER = "HDFSIMG1".getBytes();
-  private static final int FILE_VERSION = 1;
-
   public static final class Loader implements FSImageFormat.AbstractLoader {
-    private static final int MINIMUM_FILE_LENGTH = 8;
+    static final int MINIMUM_FILE_LENGTH = 8;
     private final Configuration conf;
     private final FSNamesystem fsn;
 
@@ -118,50 +112,12 @@ public final class FSImageFormatProtobuf {
       }
     }
 
-    private boolean checkFileFormat(RandomAccessFile file) throws IOException {
-      if (file.length() < MINIMUM_FILE_LENGTH)
-        return false;
-
-      byte[] magic = new byte[MAGIC_HEADER.length];
-      file.readFully(magic);
-      if (!Arrays.equals(MAGIC_HEADER, magic))
-        return false;
-
-      return true;
-    }
-
-    private FileSummary loadSummary(RandomAccessFile file) throws IOException {
-      final int FILE_LENGTH_FIELD_SIZE = 4;
-      long fileLength = file.length();
-      file.seek(fileLength - FILE_LENGTH_FIELD_SIZE);
-      int summaryLength = file.readInt();
-      file.seek(fileLength - FILE_LENGTH_FIELD_SIZE - summaryLength);
-
-      byte[] summaryBytes = new byte[summaryLength];
-      file.readFully(summaryBytes);
-
-      FileSummary summary = FileSummary
-          .parseDelimitedFrom(new ByteArrayInputStream(summaryBytes));
-      if (summary.getOndiskVersion() != FILE_VERSION) {
-        throw new IOException("Unsupported file version "
-            + summary.getOndiskVersion());
-      }
-
-      if (!LayoutVersion.supports(Feature.PROTOBUF_FORMAT,
-          summary.getLayoutVersion())) {
-        throw new IOException("Unsupported layout version "
-            + summary.getLayoutVersion());
-      }
-      return summary;
-    }
-
-    @SuppressWarnings("resource")
     private void loadInternal(RandomAccessFile raFile, FileInputStream fin)
         throws IOException {
-      if (!checkFileFormat(raFile)) {
+      if (!FSImageUtil.checkFileFormat(raFile)) {
         throw new IOException("Unrecognized file format");
       }
-      FileSummary summary = loadSummary(raFile);
+      FileSummary summary = FSImageUtil.loadSummary(raFile);
 
       FileChannel channel = fin.getChannel();
 
@@ -192,15 +148,8 @@ public final class FSImageFormatProtobuf {
         InputStream in = new BufferedInputStream(new LimitInputStream(fin,
             s.getLength()));
 
-        if (summary.hasCodec()) {
-          // read compression related info
-          FSImageCompression compression = FSImageCompression
-              .createCompression(conf, summary.getCodec());
-          CompressionCodec imageCodec = compression.getImageCodec();
-          if (summary.getCodec() != null) {
-            in = imageCodec.createInputStream(in);
-          }
-        }
+        in = FSImageUtil.wrapInputStreamForCompression(conf,
+            summary.getCodec(), in);
 
         String n = s.getName();
         switch (SectionName.fromString(n)) {
@@ -292,7 +241,7 @@ public final class FSImageFormatProtobuf {
 
   public static final class Saver {
     private final SaveNamespaceContext context;
-    private long currentOffset = MAGIC_HEADER.length;
+    private long currentOffset = FSImageUtil.MAGIC_HEADER.length;
     private MD5Hash savedDigest;
     private StringMap stringMap = new StringMap();
 
@@ -378,12 +327,12 @@ public final class FSImageFormatProtobuf {
       MessageDigest digester = MD5Hash.getDigester();
       underlyingOutputStream = new DigestOutputStream(new BufferedOutputStream(
           fout), digester);
-      underlyingOutputStream.write(MAGIC_HEADER);
+      underlyingOutputStream.write(FSImageUtil.MAGIC_HEADER);
 
       fileChannel = fout.getChannel();
 
       FileSummary.Builder b = FileSummary.newBuilder()
-          .setOndiskVersion(FILE_VERSION)
+          .setOndiskVersion(FSImageUtil.FILE_VERSION)
           .setLayoutVersion(LayoutVersion.getCurrentLayoutVersion());
 
       codec = compression.getImageCodec();
@@ -531,7 +480,7 @@ public final class FSImageFormatProtobuf {
 
     private static final SectionName[] values = SectionName.values();
 
-    private static SectionName fromString(String name) {
+    public static SectionName fromString(String name) {
       for (SectionName n : values) {
         if (n.name.equals(name))
           return n;

+ 93 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageUtil.java

@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+import java.util.Arrays;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.LayoutVersion;
+import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.Loader;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.io.compress.CompressionCodec;
+
+@InterfaceAudience.Private
+public final class FSImageUtil {
+  public static final byte[] MAGIC_HEADER = "HDFSIMG1".getBytes();
+  public static final int FILE_VERSION = 1;
+
+  public static boolean checkFileFormat(RandomAccessFile file)
+      throws IOException {
+    if (file.length() < Loader.MINIMUM_FILE_LENGTH)
+      return false;
+
+    byte[] magic = new byte[MAGIC_HEADER.length];
+    file.readFully(magic);
+    if (!Arrays.equals(MAGIC_HEADER, magic))
+      return false;
+
+    return true;
+  }
+
+  public static FileSummary loadSummary(RandomAccessFile file)
+      throws IOException {
+    final int FILE_LENGTH_FIELD_SIZE = 4;
+    long fileLength = file.length();
+    file.seek(fileLength - FILE_LENGTH_FIELD_SIZE);
+    int summaryLength = file.readInt();
+
+    if (summaryLength <= 0) {
+      throw new IOException("Negative length of the file");
+    }
+    file.seek(fileLength - FILE_LENGTH_FIELD_SIZE - summaryLength);
+
+    byte[] summaryBytes = new byte[summaryLength];
+    file.readFully(summaryBytes);
+
+    FileSummary summary = FileSummary
+        .parseDelimitedFrom(new ByteArrayInputStream(summaryBytes));
+    if (summary.getOndiskVersion() != FILE_VERSION) {
+      throw new IOException("Unsupported file version "
+          + summary.getOndiskVersion());
+    }
+
+    if (!LayoutVersion.supports(Feature.PROTOBUF_FORMAT,
+        summary.getLayoutVersion())) {
+      throw new IOException("Unsupported layout version "
+          + summary.getLayoutVersion());
+    }
+    return summary;
+  }
+
+  public static InputStream wrapInputStreamForCompression(
+      Configuration conf, String codec, InputStream in) throws IOException {
+    if (codec.isEmpty())
+      return in;
+
+    FSImageCompression compression = FSImageCompression.createCompression(
+        conf, codec);
+    CompressionCodec imageCodec = compression.getImageCodec();
+    return imageCodec.createInputStream(in);
+  }
+
+}

+ 160 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java

@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintWriter;
+import java.io.RandomAccessFile;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
+import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
+import org.apache.hadoop.io.IOUtils;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.LimitInputStream;
+
+/**
+ * This is the tool for analyzing file sizes in the namespace image. In order to
+ * run the tool one should define a range of integers <tt>[0, maxSize]</tt> by
+ * specifying <tt>maxSize</tt> and a <tt>step</tt>. The range of integers is
+ * divided into segments of size <tt>step</tt>:
+ * <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>, and the visitor
+ * calculates how many files in the system fall into each segment
+ * <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. Note that files larger than
+ * <tt>maxSize</tt> always fall into the very last segment.
+ *
+ * <h3>Input.</h3>
+ * <ul>
+ * <li><tt>filename</tt> specifies the location of the image file;</li>
+ * <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
+ * sizes considered by the visitor;</li>
+ * <li><tt>step</tt> the range is divided into segments of size step.</li>
+ * </ul>
+ *
+ * <h3>Output.</h3> The output file is formatted as a tab separated two column
+ * table: Size and NumFiles. Where Size represents the start of the segment, and
+ * numFiles is the number of files form the image which size falls in this
+ * segment.
+ *
+ */
+final class FileDistributionCalculator {
+  private final static long MAX_SIZE_DEFAULT = 0x2000000000L; // 1/8 TB = 2^37
+  private final static int INTERVAL_DEFAULT = 0x200000; // 2 MB = 2^21
+
+  private final Configuration conf;
+  private final long maxSize;
+  private final int steps;
+  private final PrintWriter out;
+
+  private int[] distribution;
+  private int totalFiles;
+  private int totalDirectories;
+  private int totalBlocks;
+  private long totalSpace;
+  private long maxFileSize;
+
+  FileDistributionCalculator(Configuration conf, long maxSize, int steps,
+      PrintWriter out) {
+    this.conf = conf;
+    this.maxSize = maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize;
+    this.steps = steps == 0 ? INTERVAL_DEFAULT : steps;
+    this.out = out;
+    long numIntervals = this.maxSize / this.steps;
+    this.distribution = new int[1 + (int) (numIntervals)];
+    Preconditions.checkState(numIntervals < Integer.MAX_VALUE,
+        "Too many distribution intervals");
+  }
+
+  void visit(RandomAccessFile file) throws IOException {
+    if (!FSImageUtil.checkFileFormat(file)) {
+      throw new IOException("Unrecognized FSImage");
+    }
+
+    FileSummary summary = FSImageUtil.loadSummary(file);
+    FileInputStream in = null;
+    try {
+      in = new FileInputStream(file.getFD());
+      for (FileSummary.Section s : summary.getSectionsList()) {
+        if (SectionName.fromString(s.getName()) != SectionName.INODE) {
+          continue;
+        }
+
+        in.getChannel().position(s.getOffset());
+        InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
+            summary.getCodec(), new BufferedInputStream(new LimitInputStream(
+                in, s.getLength())));
+        run(is);
+        output();
+      }
+    } finally {
+      IOUtils.cleanup(null, in);
+    }
+  }
+
+  private void run(InputStream in) throws IOException {
+    INodeSection s = INodeSection.parseDelimitedFrom(in);
+    for (int i = 0; i < s.getNumInodes(); ++i) {
+      INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in);
+      if (p.getType() == INodeSection.INode.Type.FILE) {
+        ++totalFiles;
+        INodeSection.INodeFile f = p.getFile();
+        totalBlocks += f.getBlocksCount();
+        long fileSize = 0;
+        for (BlockProto b : f.getBlocksList()) {
+          fileSize += b.getNumBytes() * f.getReplication();
+        }
+        maxFileSize = Math.max(fileSize, maxFileSize);
+        totalSpace += fileSize;
+
+        int bucket = fileSize > maxSize ? distribution.length - 1 : (int) Math
+            .ceil(fileSize / steps);
+        ++distribution[bucket];
+
+      } else if (p.getType() == INodeSection.INode.Type.DIRECTORY) {
+        ++totalDirectories;
+      }
+
+      if (i % (1 << 20) == 0) {
+        out.println("Processed " + i + " inodes.");
+      }
+    }
+  }
+
+  private void output() {
+    // write the distribution into the output file
+    out.print("Size\tNumFiles\n");
+    for (int i = 0; i < distribution.length; i++) {
+      if (distribution[i] != 0) {
+        out.print(((long) i * steps) + "\t" + distribution[i]);
+        out.print('\n');
+      }
+    }
+    out.print("totalFiles = " + totalFiles + "\n");
+    out.print("totalDirectories = " + totalDirectories + "\n");
+    out.print("totalBlocks = " + totalBlocks + "\n");
+    out.print("totalSpace = " + totalSpace + "\n");
+    out.print("maxFileSize = " + maxFileSize + "\n");
+  }
+}

+ 233 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsrPBImage.java

@@ -0,0 +1,233 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintWriter;
+import java.io.RandomAccessFile;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
+import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeFile;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeSymlink;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection;
+import org.apache.hadoop.hdfs.server.namenode.INodeId;
+import org.apache.hadoop.io.IOUtils;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.io.LimitInputStream;
+
+/**
+ * This is the tool for analyzing file sizes in the namespace image. In order to
+ * run the tool one should define a range of integers <tt>[0, maxSize]</tt> by
+ * specifying <tt>maxSize</tt> and a <tt>step</tt>. The range of integers is
+ * divided into segments of size <tt>step</tt>:
+ * <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>, and the visitor
+ * calculates how many files in the system fall into each segment
+ * <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. Note that files larger than
+ * <tt>maxSize</tt> always fall into the very last segment.
+ *
+ * <h3>Input.</h3>
+ * <ul>
+ * <li><tt>filename</tt> specifies the location of the image file;</li>
+ * <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
+ * sizes considered by the visitor;</li>
+ * <li><tt>step</tt> the range is divided into segments of size step.</li>
+ * </ul>
+ *
+ * <h3>Output.</h3> The output file is formatted as a tab separated two column
+ * table: Size and NumFiles. Where Size represents the start of the segment, and
+ * numFiles is the number of files form the image which size falls in this
+ * segment.
+ * 
+ */
+final class LsrPBImage {
+  private final Configuration conf;
+  private final PrintWriter out;
+  private String[] stringTable;
+  private HashMap<Long, INodeSection.INode> inodes = Maps.newHashMap();
+  private HashMap<Long, long[]> dirmap = Maps.newHashMap();
+
+  public LsrPBImage(Configuration conf, PrintWriter out) {
+    this.conf = conf;
+    this.out = out;
+  }
+
+  public void visit(RandomAccessFile file) throws IOException {
+    if (!FSImageUtil.checkFileFormat(file)) {
+      throw new IOException("Unrecognized FSImage");
+    }
+
+    FileSummary summary = FSImageUtil.loadSummary(file);
+    FileInputStream fin = null;
+    try {
+      fin = new FileInputStream(file.getFD());
+
+      ArrayList<FileSummary.Section> sections = Lists.newArrayList(summary
+          .getSectionsList());
+      Collections.sort(sections, new Comparator<FileSummary.Section>() {
+        @Override
+        public int compare(FileSummary.Section s1, FileSummary.Section s2) {
+          SectionName n1 = SectionName.fromString(s1.getName());
+          SectionName n2 = SectionName.fromString(s2.getName());
+          if (n1 == null) {
+            return n2 == null ? 0 : -1;
+          } else if (n2 == null) {
+            return -1;
+          } else {
+            return n1.ordinal() - n2.ordinal();
+          }
+        }
+      });
+
+      for (FileSummary.Section s : sections) {
+        fin.getChannel().position(s.getOffset());
+        InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
+            summary.getCodec(), new BufferedInputStream(new LimitInputStream(
+                fin, s.getLength())));
+
+        switch (SectionName.fromString(s.getName())) {
+        case STRING_TABLE:
+          loadStringTable(is);
+          break;
+        case INODE:
+          loadINodeSection(is);
+          break;
+        case INODE_DIR:
+          loadINodeDirectorySection(is);
+          break;
+        default:
+          break;
+        }
+      }
+      list("", INodeId.ROOT_INODE_ID);
+    } finally {
+      IOUtils.cleanup(null, fin);
+    }
+  }
+
+  private void list(String parent, long dirId) {
+    INode inode = inodes.get(dirId);
+    listINode(parent.isEmpty() ? "/" : parent, inode);
+    long[] children = dirmap.get(dirId);
+    if (children == null) {
+      return;
+    }
+    String newParent = parent + inode.getName().toStringUtf8() + "/";
+    for (long cid : children) {
+      list(newParent, cid);
+    }
+  }
+
+  private void listINode(String parent, INode inode) {
+    switch (inode.getType()) {
+    case FILE: {
+      INodeFile f = inode.getFile();
+      PermissionStatus p = FSImageFormatPBINode.Loader.loadPermission(
+          f.getPermission(), stringTable);
+      out.print(String.format("-%s %2s %8s %10s %10s %10d %s%s\n", p
+          .getPermission().toString(), f.getReplication(), p.getUserName(), p
+          .getGroupName(), f.getModificationTime(), getFileSize(f), parent,
+          inode.getName().toStringUtf8()));
+    }
+      break;
+    case DIRECTORY: {
+      INodeDirectory d = inode.getDirectory();
+      PermissionStatus p = FSImageFormatPBINode.Loader.loadPermission(
+          d.getPermission(), stringTable);
+      out.print(String.format("d%s  - %8s %10s %10s %10d %s%s\n", p
+          .getPermission().toString(), p.getUserName(), p.getGroupName(), d
+          .getModificationTime(), 0, parent, inode.getName().toStringUtf8()));
+    }
+      break;
+    case SYMLINK: {
+      INodeSymlink d = inode.getSymlink();
+      PermissionStatus p = FSImageFormatPBINode.Loader.loadPermission(
+          d.getPermission(), stringTable);
+      out.print(String.format("-%s  - %8s %10s %10s %10d %s%s -> %s\n", p
+          .getPermission().toString(), p.getUserName(), p.getGroupName(), 0, 0,
+          parent, inode.getName().toStringUtf8(), d.getTarget().toStringUtf8()));
+    }
+      break;
+    default:
+      break;
+    }
+  }
+
+  private long getFileSize(INodeFile f) {
+    long size = 0;
+    for (BlockProto p : f.getBlocksList()) {
+      size += p.getNumBytes();
+    }
+    return size;
+  }
+
+  private void loadINodeDirectorySection(InputStream in) throws IOException {
+    while (true) {
+      INodeDirectorySection.DirEntry e = INodeDirectorySection.DirEntry
+          .parseDelimitedFrom(in);
+      // note that in is a LimitedInputStream
+      if (e == null) {
+        break;
+      }
+      long[] l = new long[e.getChildrenCount()];
+      for (int i = 0; i < l.length; ++i) {
+        l[i] = e.getChildren(i);
+      }
+      dirmap.put(e.getParent(), l);
+      for (int i = 0; i < e.getNumOfRef(); i++) {
+        INodeSection.INodeReference.parseDelimitedFrom(in);
+      }
+    }
+  }
+
+  private void loadINodeSection(InputStream in) throws IOException {
+    INodeSection s = INodeSection.parseDelimitedFrom(in);
+    for (int i = 0; i < s.getNumInodes(); ++i) {
+      INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in);
+      inodes.put(p.getId(), p);
+    }
+  }
+
+  private void loadStringTable(InputStream in) throws IOException {
+    StringTableSection s = StringTableSection.parseDelimitedFrom(in);
+    stringTable = new String[s.getNumEntry() + 1];
+    for (int i = 0; i < s.getNumEntry(); ++i) {
+      StringTableSection.Entry e = StringTableSection.Entry
+          .parseDelimitedFrom(in);
+      stringTable[e.getId()] = e.getStr();
+    }
+  }
+}

+ 178 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java

@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.RandomAccessFile;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * OfflineImageViewer to dump the contents of an Hadoop image file to XML or the
+ * console. Main entry point into utility, either via the command line or
+ * programatically.
+ */
+@InterfaceAudience.Private
+public class OfflineImageViewerPB {
+  public static final Log LOG = LogFactory.getLog(OfflineImageViewerPB.class);
+
+  private final static String usage = "Usage: bin/hdfs oiv [OPTIONS] -i INPUTFILE -o OUTPUTFILE\n"
+      + "Offline Image Viewer\n"
+      + "View a Hadoop fsimage INPUTFILE using the specified PROCESSOR,\n"
+      + "saving the results in OUTPUTFILE.\n"
+      + "\n"
+      + "The oiv utility will attempt to parse correctly formed image files\n"
+      + "and will abort fail with mal-formed image files.\n"
+      + "\n"
+      + "The tool works offline and does not require a running cluster in\n"
+      + "order to process an image file.\n"
+      + "\n"
+      + "The following image processors are available:\n"
+      + "  * Ls: The default image processor generates an lsr-style listing\n"
+      + "    of the files in the namespace, with the same fields in the same\n"
+      + "    order.  Note that in order to correctly determine file sizes,\n"
+      + "    this formatter cannot skip blocks and will override the\n"
+      + "    -skipBlocks option.\n"
+      + "  * XML: This processor creates an XML document with all elements of\n"
+      + "    the fsimage enumerated, suitable for further analysis by XML\n"
+      + "    tools.\n"
+      + "  * FileDistribution: This processor analyzes the file size\n"
+      + "    distribution in the image.\n"
+      + "    -maxSize specifies the range [0, maxSize] of file sizes to be\n"
+      + "     analyzed (128GB by default).\n"
+      + "    -step defines the granularity of the distribution. (2MB by default)\n"
+      + "\n"
+      + "Required command line arguments:\n"
+      + "-i,--inputFile <arg>   FSImage file to process.\n"
+      + "-o,--outputFile <arg>  Name of output file. If the specified\n"
+      + "                       file exists, it will be overwritten.\n"
+      + "\n"
+      + "Optional command line arguments:\n"
+      + "-p,--processor <arg>   Select which type of processor to apply\n"
+      + "                       against image file."
+      + " (Ls|XML|FileDistribution).\n"
+      + "-h,--help              Display usage information and exit\n";
+
+  /**
+   * Build command-line options and descriptions
+   */
+  private static Options buildOptions() {
+    Options options = new Options();
+
+    // Build in/output file arguments, which are required, but there is no
+    // addOption method that can specify this
+    OptionBuilder.isRequired();
+    OptionBuilder.hasArgs();
+    OptionBuilder.withLongOpt("outputFile");
+    options.addOption(OptionBuilder.create("o"));
+
+    OptionBuilder.isRequired();
+    OptionBuilder.hasArgs();
+    OptionBuilder.withLongOpt("inputFile");
+    options.addOption(OptionBuilder.create("i"));
+
+    options.addOption("p", "processor", true, "");
+    options.addOption("h", "help", false, "");
+    options.addOption("skipBlocks", false, "");
+    options.addOption("printToScreen", false, "");
+    options.addOption("delimiter", true, "");
+
+    return options;
+  }
+
+  /**
+   * Entry point to command-line-driven operation. User may specify options and
+   * start fsimage viewer from the command line. Program will process image file
+   * and exit cleanly or, if an error is encountered, inform user and exit.
+   * 
+   * @param args
+   *          Command line options
+   * @throws IOException
+   */
+  public static void main(String[] args) throws IOException {
+    Options options = buildOptions();
+    if (args.length == 0) {
+      printUsage();
+      return;
+    }
+
+    CommandLineParser parser = new PosixParser();
+    CommandLine cmd;
+
+    try {
+      cmd = parser.parse(options, args);
+    } catch (ParseException e) {
+      System.out.println("Error parsing command-line options: ");
+      printUsage();
+      return;
+    }
+
+    if (cmd.hasOption("h")) { // print help and exit
+      printUsage();
+      return;
+    }
+
+    String inputFile = cmd.getOptionValue("i");
+    String processor = cmd.getOptionValue("p", "Ls");
+    String outputFile = cmd.getOptionValue("o");
+
+    PrintWriter out = (outputFile == null || outputFile.equals("-")) ? new PrintWriter(
+        System.out) : new PrintWriter(new File(outputFile));
+
+    Configuration conf = new Configuration();
+    try {
+      if (processor.equals("FileDistribution")) {
+        long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0"));
+        int step = Integer.parseInt(cmd.getOptionValue("step", "0"));
+        new FileDistributionCalculator(conf, maxSize, step, out)
+            .visit(new RandomAccessFile(inputFile, "r"));
+      } else if (processor.equals("XML")) {
+        new PBImageXmlWriter(conf, out).visit(new RandomAccessFile(inputFile,
+            "r"));
+      } else {
+        new LsrPBImage(conf, out).visit(new RandomAccessFile(inputFile, "r"));
+      }
+    } catch (EOFException e) {
+      System.err.println("Input file ended unexpectedly. Exiting");
+    } catch (IOException e) {
+      System.err.println("Encountered exception.  Exiting: " + e.getMessage());
+    } finally {
+      out.close();
+    }
+
+  }
+
+  /**
+   * Print application usage instructions.
+   */
+  private static void printUsage() {
+    System.out.println(usage);
+  }
+}

+ 415 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java

@@ -0,0 +1,415 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintWriter;
+import java.io.RandomAccessFile;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoExpirationProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
+import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeSymlink;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection;
+import org.apache.hadoop.io.IOUtils;
+
+import com.google.common.collect.Lists;
+import com.google.common.io.LimitInputStream;
+
+/**
+ * This is the tool for analyzing file sizes in the namespace image. In order to
+ * run the tool one should define a range of integers <tt>[0, maxSize]</tt> by
+ * specifying <tt>maxSize</tt> and a <tt>step</tt>. The range of integers is
+ * divided into segments of size <tt>step</tt>:
+ * <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>, and the visitor
+ * calculates how many files in the system fall into each segment
+ * <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. Note that files larger than
+ * <tt>maxSize</tt> always fall into the very last segment.
+ *
+ * <h3>Input.</h3>
+ * <ul>
+ * <li><tt>filename</tt> specifies the location of the image file;</li>
+ * <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
+ * sizes considered by the visitor;</li>
+ * <li><tt>step</tt> the range is divided into segments of size step.</li>
+ * </ul>
+ *
+ * <h3>Output.</h3> The output file is formatted as a tab separated two column
+ * table: Size and NumFiles. Where Size represents the start of the segment, and
+ * numFiles is the number of files form the image which size falls in this
+ * segment.
+ *
+ */
+@InterfaceAudience.Private
+public final class PBImageXmlWriter {
+  private final Configuration conf;
+  private final PrintWriter out;
+  private String[] stringTable;
+
+  public PBImageXmlWriter(Configuration conf, PrintWriter out) {
+    this.conf = conf;
+    this.out = out;
+  }
+
+  public void visit(RandomAccessFile file) throws IOException {
+    if (!FSImageUtil.checkFileFormat(file)) {
+      throw new IOException("Unrecognized FSImage");
+    }
+
+    FileSummary summary = FSImageUtil.loadSummary(file);
+    FileInputStream fin = null;
+    try {
+      fin = new FileInputStream(file.getFD());
+      out.print("<?xml version=\"1.0\"?>\n");
+
+      ArrayList<FileSummary.Section> sections = Lists.newArrayList(summary
+          .getSectionsList());
+      Collections.sort(sections, new Comparator<FileSummary.Section>() {
+        @Override
+        public int compare(FileSummary.Section s1, FileSummary.Section s2) {
+          SectionName n1 = SectionName.fromString(s1.getName());
+          SectionName n2 = SectionName.fromString(s2.getName());
+          if (n1 == null) {
+            return n2 == null ? 0 : -1;
+          } else if (n2 == null) {
+            return -1;
+          } else {
+            return n1.ordinal() - n2.ordinal();
+          }
+        }
+      });
+
+      for (FileSummary.Section s : sections) {
+        fin.getChannel().position(s.getOffset());
+        InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
+            summary.getCodec(), new BufferedInputStream(new LimitInputStream(
+                fin, s.getLength())));
+
+        switch (SectionName.fromString(s.getName())) {
+        case NS_INFO:
+          dumpNameSection(is);
+          break;
+        case STRING_TABLE:
+          loadStringTable(is);
+          break;
+        case INODE:
+          dumpINodeSection(is);
+          break;
+        case INODE_DIR:
+          dumpINodeDirectorySection(is);
+          break;
+        case FILES_UNDERCONSTRUCTION:
+          dumpFileUnderConstructionSection(is);
+          break;
+        case SNAPSHOT:
+          dumpSnapshotSection(is);
+          break;
+        case SNAPSHOT_DIFF:
+          dumpSnapshotDiffSection(is);
+          break;
+        case SECRET_MANAGER:
+          dumpSecretManagerSection(is);
+          break;
+        case CACHE_MANAGER:
+          dumpCacheManagerSection(is);
+          break;
+        default:
+          break;
+        }
+      }
+    } finally {
+      IOUtils.cleanup(null, fin);
+    }
+  }
+
+  private void dumpCacheManagerSection(InputStream is) throws IOException {
+    out.print("<CacheManagerSection>");
+    CacheManagerSection s = CacheManagerSection.parseDelimitedFrom(is);
+    o("nextDirectiveId", s.getNextDirectiveId());
+    for (int i = 0; i < s.getNumPools(); ++i) {
+      CachePoolInfoProto p = CachePoolInfoProto.parseDelimitedFrom(is);
+      out.print("<pool>");
+      o("poolName", p.getPoolName()).o("ownerName", p.getOwnerName())
+          .o("groupName", p.getGroupName()).o("mode", p.getMode())
+          .o("limit", p.getLimit())
+          .o("maxRelativeExpiry", p.getMaxRelativeExpiry());
+      out.print("</pool>\n");
+    }
+    for (int i = 0; i < s.getNumPools(); ++i) {
+      CacheDirectiveInfoProto p = CacheDirectiveInfoProto
+          .parseDelimitedFrom(is);
+      out.print("<directive>");
+      o("id", p.getId()).o("path", p.getPath())
+          .o("replication", p.getReplication()).o("pool", p.getPool());
+      out.print("<expiration>");
+      CacheDirectiveInfoExpirationProto e = p.getExpiration();
+      o("millis", e.getMillis()).o("relatilve", e.getIsRelative());
+      out.print("</expiration>\n");
+      out.print("</directive>\n");
+    }
+    out.print("</CacheManagerSection>\n");
+
+  }
+
+  private void dumpFileUnderConstructionSection(InputStream in)
+      throws IOException {
+    out.print("<FileUnderConstructionSection>");
+    while (true) {
+      FileUnderConstructionEntry e = FileUnderConstructionEntry
+          .parseDelimitedFrom(in);
+      if (e == null) {
+        break;
+      }
+      out.print("<inode>");
+      o("id", e.getInodeId()).o("path", e.getFullPath());
+      out.print("</inode>\n");
+    }
+    out.print("</FileUnderConstructionSection>\n");
+  }
+
+  private void dumpINodeDirectory(INodeDirectory d) {
+    o("mtime", d.getModificationTime()).o("permission",
+        dumpPermission(d.getPermission()));
+
+    if (d.hasDsQuota() && d.hasNsQuota()) {
+      o("nsquota", d.getNsQuota()).o("dsquota", d.getDsQuota());
+    }
+  }
+
+  private void dumpINodeDirectorySection(InputStream in) throws IOException {
+    out.print("<INodeDirectorySection>");
+    while (true) {
+      INodeDirectorySection.DirEntry e = INodeDirectorySection.DirEntry
+          .parseDelimitedFrom(in);
+      // note that in is a LimitedInputStream
+      if (e == null) {
+        break;
+      }
+      out.print("<directory>");
+      o("parent", e.getParent());
+      for (long id : e.getChildrenList()) {
+        o("inode", id);
+      }
+      for (int i = 0; i < e.getNumOfRef(); i++) {
+        INodeSection.INodeReference r = INodeSection.INodeReference
+            .parseDelimitedFrom(in);
+        dumpINodeReference(r);
+
+      }
+      out.print("</directory>\n");
+    }
+    out.print("</INodeDirectorySection>\n");
+  }
+
+  private void dumpINodeReference(INodeSection.INodeReference r) {
+    out.print("<ref>");
+    o("referredId", r.getReferredId()).o("name", r.getName().toStringUtf8())
+        .o("dstSnapshotId", r.getDstSnapshotId())
+        .o("lastSnapshotId", r.getLastSnapshotId());
+    out.print("</ref>\n");
+  }
+
+  private void dumpINodeFile(INodeSection.INodeFile f) {
+    o("replication", f.getReplication()).o("mtime", f.getModificationTime())
+        .o("atime", f.getAccessTime())
+        .o("perferredBlockSize", f.getPreferredBlockSize())
+        .o("permission", dumpPermission(f.getPermission()));
+
+    if (f.getBlocksCount() > 0) {
+      out.print("<blocks>");
+      for (BlockProto b : f.getBlocksList()) {
+        out.print("<block>");
+        o("id", b.getBlockId()).o("genstamp", b.getGenStamp()).o("numBytes",
+            b.getNumBytes());
+        out.print("</block>\n");
+      }
+      out.print("</blocks>\n");
+    }
+
+    if (f.hasFileUC()) {
+      INodeSection.FileUnderConstructionFeature u = f.getFileUC();
+      out.print("<file-under-construction>");
+      o("clientName", u.getClientName()).o("clientMachine",
+          u.getClientMachine());
+      out.print("</file-under-construction>\n");
+    }
+  }
+
+  private void dumpINodeSection(InputStream in) throws IOException {
+    INodeSection s = INodeSection.parseDelimitedFrom(in);
+    out.print("<INodeSection>");
+    o("lastInodeId", s.getLastInodeId());
+    for (int i = 0; i < s.getNumInodes(); ++i) {
+      INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in);
+      out.print("<inode>");
+      o("id", p.getId()).o("type", p.getType()).o("name",
+          p.getName().toStringUtf8());
+
+      if (p.hasFile()) {
+        dumpINodeFile(p.getFile());
+      } else if (p.hasDirectory()) {
+        dumpINodeDirectory(p.getDirectory());
+      } else if (p.hasSymlink()) {
+        dumpINodeSymlink(p.getSymlink());
+      }
+
+      out.print("</inode>\n");
+    }
+    out.print("</INodeSection>\n");
+  }
+
+  private void dumpINodeSymlink(INodeSymlink s) {
+    o("permission", dumpPermission(s.getPermission())).o("target",
+        s.getTarget().toStringUtf8());
+  }
+
+  private void dumpNameSection(InputStream in) throws IOException {
+    NameSystemSection s = NameSystemSection.parseDelimitedFrom(in);
+    out.print("<NameSection>\n");
+    o("genstampV1", s.getGenstampV1()).o("genstampV2", s.getGenstampV2())
+        .o("genstampV1Limit", s.getGenstampV1Limit())
+        .o("lastAllocatedBlockId", s.getLastAllocatedBlockId())
+        .o("txid", s.getTransactionId());
+    out.print("<NameSection>\n");
+  }
+
+  private String dumpPermission(long permission) {
+    return FSImageFormatPBINode.Loader.loadPermission(permission, stringTable)
+        .toString();
+  }
+
+  private void dumpSecretManagerSection(InputStream is) throws IOException {
+    out.print("<SecretManagerSection>");
+    SecretManagerSection s = SecretManagerSection.parseDelimitedFrom(is);
+    o("currentId", s.getCurrentId()).o("tokenSequenceNumber",
+        s.getTokenSequenceNumber());
+    out.print("</SecretManagerSection>");
+  }
+
+  private void dumpSnapshotDiffSection(InputStream in) throws IOException {
+    out.print("<SnapshotDiffSection>");
+    while (true) {
+      SnapshotDiffSection.DiffEntry e = SnapshotDiffSection.DiffEntry
+          .parseDelimitedFrom(in);
+      if (e == null) {
+        break;
+      }
+      out.print("<diff>");
+      o("inodeid", e.getInodeId());
+      switch (e.getType()) {
+      case FILEDIFF: {
+        for (int i = 0; i < e.getNumOfDiff(); ++i) {
+          out.print("<filediff>");
+          SnapshotDiffSection.FileDiff f = SnapshotDiffSection.FileDiff
+              .parseDelimitedFrom(in);
+          o("snapshotId", f.getSnapshotId()).o("size", f.getFileSize()).o(
+              "name", f.getName().toStringUtf8());
+          out.print("</filediff>\n");
+        }
+      }
+        break;
+      case DIRECTORYDIFF: {
+        for (int i = 0; i < e.getNumOfDiff(); ++i) {
+          out.print("<dirdiff>");
+          SnapshotDiffSection.DirectoryDiff d = SnapshotDiffSection.DirectoryDiff
+              .parseDelimitedFrom(in);
+          o("snapshotId", d.getSnapshotId())
+              .o("isSnapshotroot", d.getIsSnapshotRoot())
+              .o("childrenSize", d.getChildrenSize())
+              .o("name", d.getName().toStringUtf8());
+
+          for (int j = 0; j < d.getCreatedListSize(); ++j) {
+            SnapshotDiffSection.CreatedListEntry ce = SnapshotDiffSection.CreatedListEntry
+                .parseDelimitedFrom(in);
+            out.print("<created>");
+            o("name", ce.getName().toStringUtf8());
+            out.print("</created>\n");
+          }
+          for (int j = 0; j < d.getNumOfDeletedRef(); ++j) {
+            INodeSection.INodeReference r = INodeSection.INodeReference
+                .parseDelimitedFrom(in);
+            dumpINodeReference(r);
+          }
+          out.print("</dirdiff>\n");
+        }
+      }
+        break;
+      default:
+        break;
+      }
+      out.print("</diff>");
+    }
+    out.print("<SnapshotDiffSection>\n");
+  }
+
+  private void dumpSnapshotSection(InputStream in) throws IOException {
+    out.print("<SnapshotSection>");
+    SnapshotSection s = SnapshotSection.parseDelimitedFrom(in);
+    o("snapshotCounter", s.getSnapshotCounter());
+    if (s.getSnapshottableDirCount() > 0) {
+      out.print("<snapshottableDir>");
+      for (long id : s.getSnapshottableDirList()) {
+        o("dir", id);
+      }
+      out.print("</snapshottableDir>\n");
+    }
+    for (int i = 0; i < s.getNumSnapshots(); ++i) {
+      SnapshotSection.Snapshot pbs = SnapshotSection.Snapshot
+          .parseDelimitedFrom(in);
+      o("snapshot", pbs.getSnapshotId());
+    }
+    out.print("</SnapshotSection>\n");
+  }
+
+  private void loadStringTable(InputStream in) throws IOException {
+    StringTableSection s = StringTableSection.parseDelimitedFrom(in);
+    stringTable = new String[s.getNumEntry() + 1];
+    for (int i = 0; i < s.getNumEntry(); ++i) {
+      StringTableSection.Entry e = StringTableSection.Entry
+          .parseDelimitedFrom(in);
+      stringTable[e.getId()] = e.getStr();
+    }
+  }
+
+  private PBImageXmlWriter o(final String e, final Object v) {
+    out.print("<" + e + ">" + v + "</" + e + ">");
+    return this;
+  }
+}

+ 10 - 29
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java

@@ -25,6 +25,9 @@ import static org.junit.Assert.fail;
 
 import java.io.File;
 import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.RandomAccessFile;
+import java.io.StringWriter;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.EnumSet;
@@ -53,8 +56,7 @@ import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper.TestDirectoryTree;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper.TestDirectoryTree.Node;
-import org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer;
-import org.apache.hadoop.hdfs.tools.offlineImageViewer.XmlImageVisitor;
+import org.apache.hadoop.hdfs.tools.offlineImageViewer.PBImageXmlWriter;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.Time;
@@ -245,8 +247,8 @@ public class TestSnapshot {
    * snapshots
    */
   @Test
-  public void testOfflineImageViewer() throws Throwable {
-    runTestSnapshot(SNAPSHOT_ITERATION_NUMBER);
+  public void testOfflineImageViewer() throws Exception {
+    runTestSnapshot(1);
     
     // retrieve the fsimage. Note that we already save namespace to fsimage at
     // the end of each iteration of runTestSnapshot.
@@ -254,31 +256,10 @@ public class TestSnapshot {
         FSImageTestUtil.getFSImage(
         cluster.getNameNode()).getStorage().getStorageDir(0));
     assertNotNull("Didn't generate or can't find fsimage", originalFsimage);
-    
-    String ROOT = System.getProperty("test.build.data", "build/test/data");
-    File testFile = new File(ROOT, "/image");
-    String xmlImage = ROOT + "/image_xml";
-    boolean success = false;
-    
-    try {
-      DFSTestUtil.copyFile(originalFsimage, testFile);
-      XmlImageVisitor v = new XmlImageVisitor(xmlImage, true);
-      OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v,
-          true);
-      oiv.go();
-      success = true;
-    } finally {
-      if (testFile.exists()) {
-        testFile.delete();
-      }
-      // delete the xml file if the parsing is successful
-      if (success) {
-        File xmlImageFile = new File(xmlImage);
-        if (xmlImageFile.exists()) {
-          xmlImageFile.delete();
-        }
-      }
-    }
+    StringWriter output = new StringWriter();
+    PrintWriter o = new PrintWriter(output);
+    PBImageXmlWriter v = new PBImageXmlWriter(new Configuration(), o);
+    v.visit(new RandomAccessFile(originalFsimage, "r"));
   }
 
   private void runTestSnapshot(int iteration) throws Exception {

+ 109 - 322
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java

@@ -20,23 +20,20 @@ package org.apache.hadoop.hdfs.tools.offlineImageViewer;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 
 import java.io.BufferedReader;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.EOFException;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.FileReader;
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.io.RandomAccessFile;
+import java.io.StringWriter;
 import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
 import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -46,27 +43,29 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
+import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.test.PathUtils;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
-
+import org.junit.rules.TemporaryFolder;
 
 /**
- * Test function of OfflineImageViewer by:
- *   * confirming it can correctly process a valid fsimage file and that
- *     the processing generates a correct representation of the namespace
- *   * confirming it correctly fails to process an fsimage file with a layout
- *     version it shouldn't be able to handle
- *   * confirm it correctly bails on malformed image files, in particular, a
- *     file that ends suddenly.
+ * Test function of OfflineImageViewer by: * confirming it can correctly process
+ * a valid fsimage file and that the processing generates a correct
+ * representation of the namespace * confirming it correctly fails to process an
+ * fsimage file with a layout version it shouldn't be able to handle * confirm
+ * it correctly bails on malformed image files, in particular, a file that ends
+ * suddenly.
  */
 public class TestOfflineImageViewer {
   private static final Log LOG = LogFactory.getLog(OfflineImageViewer.class);
@@ -76,22 +75,22 @@ public class TestOfflineImageViewer {
   private static File originalFsimage = null;
 
   // Elements of lines of ls-file output to be compared to FileStatus instance
-  private static class LsElements {
-    public String perms;
-    public int replication;
-    public String username;
-    public String groupname;
-    public long filesize;
-    public char dir; // d if dir, - otherwise
+  private static final class LsElements {
+    private String perms;
+    private int replication;
+    private String username;
+    private String groupname;
+    private long filesize;
+    private boolean isDir;
   }
-  
+
   // namespace as written to dfs, to be compared with viewer's output
-  final static HashMap<String, FileStatus> writtenFiles = 
-      new HashMap<String, FileStatus>();
-  
-  private static String ROOT = PathUtils.getTestDirName(TestOfflineImageViewer.class);
-  
-  // Create a populated namespace for later testing.  Save its contents to a
+  final static HashMap<String, FileStatus> writtenFiles = new HashMap<String, FileStatus>();
+
+  @Rule
+  public TemporaryFolder folder = new TemporaryFolder();
+
+  // Create a populated namespace for later testing. Save its contents to a
   // data structure and store its fsimage location.
   // We only want to generate the fsimage file once and use it for
   // multiple tests.
@@ -100,35 +99,39 @@ public class TestOfflineImageViewer {
     MiniDFSCluster cluster = null;
     try {
       Configuration conf = new HdfsConfiguration();
-      conf.setLong(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 10000);
-      conf.setLong(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 5000);
-      conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
+      conf.setLong(
+          DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 10000);
+      conf.setLong(
+          DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 5000);
+      conf.setBoolean(
+          DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
       conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL,
           "RULE:[2:$1@$0](JobTracker@.*FOO.COM)s/@.*//" + "DEFAULT");
       cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
       cluster.waitActive();
       FileSystem hdfs = cluster.getFileSystem();
-      
+
       int filesize = 256;
-      
-      // Create a reasonable namespace 
-      for(int i = 0; i < NUM_DIRS; i++)  {
+
+      // Create a reasonable namespace
+      for (int i = 0; i < NUM_DIRS; i++) {
         Path dir = new Path("/dir" + i);
         hdfs.mkdirs(dir);
         writtenFiles.put(dir.toString(), pathToFileEntry(hdfs, dir.toString()));
-        for(int j = 0; j < FILES_PER_DIR; j++) {
+        for (int j = 0; j < FILES_PER_DIR; j++) {
           Path file = new Path(dir, "file" + j);
           FSDataOutputStream o = hdfs.create(file);
-          o.write(new byte[ filesize++ ]);
+          o.write(new byte[filesize++]);
           o.close();
-          
-          writtenFiles.put(file.toString(), pathToFileEntry(hdfs, file.toString()));
+
+          writtenFiles.put(file.toString(),
+              pathToFileEntry(hdfs, file.toString()));
         }
       }
 
       // Get delegation tokens so we log the delegation token op
-      Token<?>[] delegationTokens = 
-          hdfs.addDelegationTokens(TEST_RENEWER, null);
+      Token<?>[] delegationTokens = hdfs
+          .addDelegationTokens(TEST_RENEWER, null);
       for (Token<?> t : delegationTokens) {
         LOG.debug("got token " + t);
       }
@@ -137,329 +140,113 @@ public class TestOfflineImageViewer {
       cluster.getNameNodeRpc()
           .setSafeMode(SafeModeAction.SAFEMODE_ENTER, false);
       cluster.getNameNodeRpc().saveNamespace();
-      
+
       // Determine location of fsimage file
-      originalFsimage = FSImageTestUtil.findLatestImageFile(
-          FSImageTestUtil.getFSImage(
-          cluster.getNameNode()).getStorage().getStorageDir(0));
+      originalFsimage = FSImageTestUtil.findLatestImageFile(FSImageTestUtil
+          .getFSImage(cluster.getNameNode()).getStorage().getStorageDir(0));
       if (originalFsimage == null) {
         throw new RuntimeException("Didn't generate or can't find fsimage");
       }
       LOG.debug("original FS image file is " + originalFsimage);
     } finally {
-      if(cluster != null)
+      if (cluster != null)
         cluster.shutdown();
     }
   }
-  
+
   @AfterClass
   public static void deleteOriginalFSImage() throws IOException {
-    if(originalFsimage != null && originalFsimage.exists()) {
+    if (originalFsimage != null && originalFsimage.exists()) {
       originalFsimage.delete();
     }
   }
-  
-  // Convenience method to generate a file status from file system for 
+
+  // Convenience method to generate a file status from file system for
   // later comparison
-  private static FileStatus pathToFileEntry(FileSystem hdfs, String file) 
-        throws IOException {
+  private static FileStatus pathToFileEntry(FileSystem hdfs, String file)
+      throws IOException {
     return hdfs.getFileStatus(new Path(file));
   }
-  
-  // Verify that we can correctly generate an ls-style output for a valid 
+
+  // Verify that we can correctly generate an ls-style output for a valid
   // fsimage
   @Test
   public void outputOfLSVisitor() throws IOException {
-    File testFile = new File(ROOT, "/basicCheck");
-    File outputFile = new File(ROOT, "/basicCheckOutput");
-    
-    try {
-      DFSTestUtil.copyFile(originalFsimage, testFile);
-      
-      ImageVisitor v = new LsImageVisitor(outputFile.getPath(), true);
-      OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v, false);
-
-      oiv.go();
-      
-      HashMap<String, LsElements> fileOutput = readLsfile(outputFile);
-      
-      compareNamespaces(writtenFiles, fileOutput);
-    } finally {
-      if(testFile.exists()) testFile.delete();
-      if(outputFile.exists()) outputFile.delete();
-    }
-    LOG.debug("Correctly generated ls-style output.");
-  }
-  
-  // Confirm that attempting to read an fsimage file with an unsupported
-  // layout results in an error
-  @Test
-  public void unsupportedFSLayoutVersion() throws IOException {
-    File testFile = new File(ROOT, "/invalidLayoutVersion");
-    File outputFile = new File(ROOT, "invalidLayoutVersionOutput");
-    
-    try {
-      int badVersionNum = -432;
-      changeLayoutVersion(originalFsimage, testFile, badVersionNum);
-      ImageVisitor v = new LsImageVisitor(outputFile.getPath(), true);
-      OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v, false);
-      
-      try {
-        oiv.go();
-        fail("Shouldn't be able to read invalid laytout version");
-      } catch(IOException e) {
-        if(!e.getMessage().contains(Integer.toString(badVersionNum)))
-          throw e; // wasn't error we were expecting
-        LOG.debug("Correctly failed at reading bad image version.");
+    StringWriter output = new StringWriter();
+    PrintWriter out = new PrintWriter(output);
+    LsrPBImage v = new LsrPBImage(new Configuration(), out);
+    v.visit(new RandomAccessFile(originalFsimage, "r"));
+    out.close();
+    Pattern pattern = Pattern
+        .compile("([d\\-])([rwx\\-]{9})\\s*(-|\\d+)\\s*(\\w+)\\s*(\\w+)\\s*(\\d+)\\s*(\\d+)\\s*([\b/]+)");
+    int count = 0;
+    for (String s : output.toString().split("\n")) {
+      Matcher m = pattern.matcher(s);
+      assertTrue(m.find());
+      LsElements e = new LsElements();
+      e.isDir = m.group(1).equals("d");
+      e.perms = m.group(2);
+      e.replication = m.group(3).equals("-") ? 0 : Integer.parseInt(m.group(3));
+      e.username = m.group(4);
+      e.groupname = m.group(5);
+      e.filesize = Long.parseLong(m.group(7));
+      String path = m.group(8);
+      if (!path.equals("/")) {
+        compareFiles(writtenFiles.get(path), e);
       }
-    } finally {
-      if(testFile.exists()) testFile.delete();
-      if(outputFile.exists()) outputFile.delete();
+      ++count;
     }
+    assertEquals(writtenFiles.size() + 1, count);
   }
-  
-  // Verify that image viewer will bail on a file that ends unexpectedly
-  @Test
-  public void truncatedFSImage() throws IOException {
-    File testFile = new File(ROOT, "/truncatedFSImage");
-    File outputFile = new File(ROOT, "/trucnatedFSImageOutput");
-    try {
-      copyPartOfFile(originalFsimage, testFile);
-      assertTrue("Created truncated fsimage", testFile.exists());
-      
-      ImageVisitor v = new LsImageVisitor(outputFile.getPath(), true);
-      OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v, false);
-
-      try {
-        oiv.go();
-        fail("Managed to process a truncated fsimage file");
-      } catch (EOFException e) {
-        LOG.debug("Correctly handled EOF");
-      }
 
-    } finally {
-      if(testFile.exists()) testFile.delete();
-      if(outputFile.exists()) outputFile.delete();
-    }
+  @Test(expected = IOException.class)
+  public void testTruncatedFSImage() throws IOException {
+    File truncatedFile = folder.newFile();
+    StringWriter output = new StringWriter();
+    copyPartOfFile(originalFsimage, truncatedFile);
+    new FileDistributionCalculator(new Configuration(), 0, 0, new PrintWriter(
+        output)).visit(new RandomAccessFile(truncatedFile, "r"));
   }
-  
-  // Test that our ls file has all the same compenents of the original namespace
-  private void compareNamespaces(HashMap<String, FileStatus> written,
-      HashMap<String, LsElements> fileOutput) {
-    assertEquals( "Should be the same number of files in both, plus one for root"
-            + " in fileoutput", fileOutput.keySet().size(), 
-                                written.keySet().size() + 1);
-    Set<String> inFile = fileOutput.keySet();
 
-    // For each line in the output file, verify that the namespace had a
-    // filestatus counterpart 
-    for (String path : inFile) {
-      if (path.equals("/")) // root's not included in output from system call
-        continue;
-
-      assertTrue("Path in file (" + path + ") was written to fs", written
-          .containsKey(path));
-      
-      compareFiles(written.get(path), fileOutput.get(path));
-      
-      written.remove(path);
-    }
-
-    assertEquals("No more files were written to fs", 0, written.size());
-  }
-  
   // Compare two files as listed in the original namespace FileStatus and
   // the output of the ls file from the image processor
   private void compareFiles(FileStatus fs, LsElements elements) {
-    assertEquals("directory listed as such",  
-                 fs.isDirectory() ? 'd' : '-', elements.dir);
-    assertEquals("perms string equal", 
-                                fs.getPermission().toString(), elements.perms);
+    assertEquals("directory listed as such", fs.isDirectory(), elements.isDir);
+    assertEquals("perms string equal", fs.getPermission().toString(),
+        elements.perms);
     assertEquals("replication equal", fs.getReplication(), elements.replication);
     assertEquals("owner equal", fs.getOwner(), elements.username);
     assertEquals("group equal", fs.getGroup(), elements.groupname);
     assertEquals("lengths equal", fs.getLen(), elements.filesize);
   }
 
-  // Read the contents of the file created by the Ls processor
-  private HashMap<String, LsElements> readLsfile(File lsFile) throws IOException {
-    BufferedReader br = new BufferedReader(new FileReader(lsFile));
-    String line = null;
-    HashMap<String, LsElements> fileContents = new HashMap<String, LsElements>();
-    
-    while((line = br.readLine()) != null) 
-      readLsLine(line, fileContents);
-    
-    br.close();
-    return fileContents;
-  }
-  
-  // Parse a line from the ls output.  Store permissions, replication, 
-  // username, groupname and filesize in hashmap keyed to the path name
-  private void readLsLine(String line, HashMap<String, LsElements> fileContents) {
-    String elements [] = line.split("\\s+");
-    
-    assertEquals("Not enough elements in ls output", 8, elements.length);
-    
-    LsElements lsLine = new LsElements();
-    
-    lsLine.dir = elements[0].charAt(0);
-    lsLine.perms = elements[0].substring(1);
-    lsLine.replication = elements[1].equals("-") 
-                                             ? 0 : Integer.valueOf(elements[1]);
-    lsLine.username = elements[2];
-    lsLine.groupname = elements[3];
-    lsLine.filesize = Long.valueOf(elements[4]);
-    // skipping date and time 
-    
-    String path = elements[7];
-    
-    // Check that each file in the ls output was listed once
-    assertFalse("LS file had duplicate file entries", 
-        fileContents.containsKey(path));
-    
-    fileContents.put(path, lsLine);
-  }
-  
-  // Copy one fsimage to another, changing the layout version in the process
-  private void changeLayoutVersion(File src, File dest, int newVersion) 
-         throws IOException {
-    DataInputStream in = null; 
-    DataOutputStream out = null; 
-    
-    try {
-      in = new DataInputStream(new FileInputStream(src));
-      out = new DataOutputStream(new FileOutputStream(dest));
-      
-      in.readInt();
-      out.writeInt(newVersion);
-      
-      byte [] b = new byte[1024];
-      while( in.read(b)  > 0 ) {
-        out.write(b);
-      }
-    } finally {
-      if(in != null) in.close();
-      if(out != null) out.close();
-    }
-  }
-  
-  // Only copy part of file into the other.  Used for testing truncated fsimage
   private void copyPartOfFile(File src, File dest) throws IOException {
-    InputStream in = null;
-    OutputStream out = null;
-    
-    byte [] b = new byte[256];
-    int bytesWritten = 0;
-    int count;
-    int maxBytes = 700;
-    
+    FileInputStream in = null;
+    FileOutputStream out = null;
+    final int MAX_BYTES = 700;
     try {
       in = new FileInputStream(src);
       out = new FileOutputStream(dest);
-      
-      while( (count = in.read(b))  > 0 && bytesWritten < maxBytes ) {
-        out.write(b);
-        bytesWritten += count;
-      } 
+      in.getChannel().transferTo(0, MAX_BYTES, out.getChannel());
     } finally {
-      if(in != null) in.close();
-      if(out != null) out.close();
+      IOUtils.cleanup(null, in);
+      IOUtils.cleanup(null, out);
     }
   }
 
   @Test
-  public void outputOfFileDistributionVisitor() throws IOException {
-    File testFile = new File(ROOT, "/basicCheck");
-    File outputFile = new File(ROOT, "/fileDistributionCheckOutput");
-
-    int totalFiles = 0;
-    BufferedReader reader = null;
-    try {
-      DFSTestUtil.copyFile(originalFsimage, testFile);
-      ImageVisitor v = new FileDistributionVisitor(outputFile.getPath(), 0, 0);
-      OfflineImageViewer oiv = 
-        new OfflineImageViewer(testFile.getPath(), v, false);
-
-      oiv.go();
-
-      reader = new BufferedReader(new FileReader(outputFile));
-      String line = reader.readLine();
-      assertEquals(line, "Size\tNumFiles");
-      while((line = reader.readLine()) != null) {
-        String[] row = line.split("\t");
-        assertEquals(row.length, 2);
-        totalFiles += Integer.parseInt(row[1]);
-      }
-    } finally {
-      if (reader != null) {
-        reader.close();
-      }
-      if(testFile.exists()) testFile.delete();
-      if(outputFile.exists()) outputFile.delete();
-    }
+  public void testFileDistributionVisitor() throws IOException {
+    StringWriter output = new StringWriter();
+    PrintWriter o = new PrintWriter(output);
+    new FileDistributionCalculator(new Configuration(), 0, 0, o)
+        .visit(new RandomAccessFile(originalFsimage, "r"));
+    o.close();
+
+    Pattern p = Pattern.compile("totalFiles = (\\d+)\n");
+    Matcher matcher = p.matcher(output.getBuffer());
+
+    assertTrue(matcher.find() && matcher.groupCount() == 1);
+    int totalFiles = Integer.parseInt(matcher.group(1));
     assertEquals(totalFiles, NUM_DIRS * FILES_PER_DIR);
   }
-  
-  private static class TestImageVisitor extends ImageVisitor {
-    private List<String> delegationTokenRenewers = new LinkedList<String>();
-    TestImageVisitor() {
-    }
-    
-    List<String> getDelegationTokenRenewers() {
-      return delegationTokenRenewers;
-    }
-
-    @Override
-    void start() throws IOException {
-    }
-
-    @Override
-    void finish() throws IOException {
-    }
-
-    @Override
-    void finishAbnormally() throws IOException {
-    }
-
-    @Override
-    void visit(ImageElement element, String value) throws IOException {
-      if (element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_RENEWER) {
-        delegationTokenRenewers.add(value);
-      }
-    }
-
-    @Override
-    void visitEnclosingElement(ImageElement element) throws IOException {
-    }
-
-    @Override
-    void visitEnclosingElement(ImageElement element, ImageElement key,
-        String value) throws IOException {
-    }
-
-    @Override
-    void leaveEnclosingElement() throws IOException {
-    }
-  }
-
-  @Test
-  public void outputOfTestVisitor() throws IOException {
-    File testFile = new File(ROOT, "/basicCheck");
-
-    try {
-      DFSTestUtil.copyFile(originalFsimage, testFile);
-      TestImageVisitor v = new TestImageVisitor();
-      OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v, true);
-      oiv.go();
-
-      // Validated stored delegation token identifiers.
-      List<String> dtrs = v.getDelegationTokenRenewers();
-      assertEquals(1, dtrs.size());
-      assertEquals(TEST_RENEWER, dtrs.get(0));
-    } finally {
-      if(testFile.exists()) testFile.delete();
-    }
-    LOG.debug("Passed TestVisitor validation.");
-  }
 }