Prechádzať zdrojové kódy

HDFS-5633. Improve OfflineImageViewer to use less memory. Contributed by Jing Zhao.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1548359 13f79535-47bb-0310-9956-ffa450edef68
Jing Zhao 11 rokov pred
rodič
commit
6b459506c3

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -576,6 +576,8 @@ Release 2.4.0 - UNRELEASED
     HDFS-5581. NameNodeFsck should use only one instance of
     BlockPlacementPolicy. (vinay via cmccabe)
 
+    HDFS-5633. Improve OfflineImageViewer to use less memory. (jing9)
+
   OPTIMIZATIONS
 
     HDFS-5239.  Allow FSNamesystem lock fairness to be configurable (daryn)

+ 12 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java

@@ -100,6 +100,18 @@ class FileDistributionVisitor extends TextWriterImageVisitor {
 
   @Override
   void finish() throws IOException {
+    output();
+    super.finish();
+  }
+
+  @Override
+  void finishAbnormally() throws IOException {
+    System.out.println("*** Image processing finished abnormally.  Ending ***");
+    output();
+    super.finishAbnormally();
+  }
+
+  private void output() throws IOException {
     // write the distribution into the output file
     write("Size\tNumFiles\n");
     for(int i = 0; i < distribution.length; i++)
@@ -109,7 +121,6 @@ class FileDistributionVisitor extends TextWriterImageVisitor {
     System.out.println("totalBlocks = " + totalBlocks);
     System.out.println("totalSpace = " + totalSpace);
     System.out.println("maxFileSize = " + maxFileSize);
-    super.finish();
   }
 
   @Override

+ 17 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java

@@ -129,7 +129,7 @@ class ImageLoaderCurrent implements ImageLoader {
       -40, -41, -42, -43, -44, -45, -46, -47, -48 };
   private int imageVersion = 0;
   
-  private final Map<Long, String> subtreeMap = new HashMap<Long, String>();
+  private final Map<Long, Boolean> subtreeMap = new HashMap<Long, Boolean>();
   private final Map<Long, String> dirNodeMap = new HashMap<Long, String>();
 
   /* (non-Javadoc)
@@ -500,11 +500,15 @@ class ImageLoaderCurrent implements ImageLoader {
     // 1. load dir node id
     long inodeId = in.readLong();
     
-    String dirName = dirNodeMap.get(inodeId);
-    String oldValue = subtreeMap.put(inodeId, dirName);
-    if (oldValue != null) { // the subtree has been visited
-      return;
-    }
+    String dirName = dirNodeMap.remove(inodeId);
+    Boolean visitedRef = subtreeMap.get(inodeId);
+    if (visitedRef != null) {
+      if (visitedRef.booleanValue()) { // the subtree has been visited
+        return;
+      } else { // first time to visit
+        subtreeMap.put(inodeId, true);
+      }
+    } // else the dir is not linked by a RefNode, thus cannot be revisited
     
     // 2. load possible snapshots
     processSnapshots(in, v, dirName);
@@ -695,6 +699,8 @@ class ImageLoaderCurrent implements ImageLoader {
     
     if (numBlocks >= 0) { // File
       if (supportSnapshot) {
+        // make sure subtreeMap only contains entry for directory
+        subtreeMap.remove(inodeId);
         // process file diffs
         processFileDiffList(in, v, parentName);
         if (isSnapshotCopy) {
@@ -738,6 +744,11 @@ class ImageLoaderCurrent implements ImageLoader {
       
       final boolean firstReferred = in.readBoolean();
       if (firstReferred) {
+        // if a subtree is linked by multiple "parents", the corresponding dir
+        // must be referred by a reference node. we put the reference node into
+        // the subtreeMap here and let its value be false. when we later visit
+        // the subtree for the first time, we change the value to true.
+        subtreeMap.put(inodeId, false);
         v.visitEnclosingElement(ImageElement.SNAPSHOT_REF_INODE);
         processINode(in, v, skipBlocks, parentName, isSnapshotCopy);
         v.leaveEnclosingElement();  // referred inode