Browse Source

HADOOP-1838. The blocksize of files created with an earlier release is
set to the default block size. (Dhruba Borthakur via dhruba)



git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@573413 13f79535-47bb-0310-9956-ffa450edef68

Dhruba Borthakur 18 years ago
parent
commit
a978c7e2aa

+ 3 - 0
CHANGES.txt

@@ -19,6 +19,9 @@ Trunk (unreleased changes)
     HADOOP-1656.  The blockSize of a file is stored persistently in the file
     inode. (Dhruba Borthakur via dhruba)
 
+    HADOOP-1838.  The blocksize of files created with an earlier release is
+    set to the default block size.  (Dhruba Borthakur via dhruba)
+
   NEW FEATURES
 
     HADOOP-1636.  Allow configuration of the number of jobs kept in

+ 2 - 2
src/java/org/apache/hadoop/dfs/FSConstants.java

@@ -156,7 +156,7 @@ public interface FSConstants {
   // Version is reflected in the data storage file.
   // Versions are negative.
   // Decrement LAYOUT_VERSION to define a new version.
-  public static final int LAYOUT_VERSION = -8;
+  public static final int LAYOUT_VERSION = -9;
   // Current version: 
-  // added blocksize to inode
+  // files with one block uses the default blocksize
 }

+ 8 - 5
src/java/org/apache/hadoop/dfs/FSEditLog.java

@@ -27,6 +27,7 @@ import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.lang.Math;
 
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.UTF8;
@@ -282,15 +283,17 @@ class FSEditLog {
 
             // Older versions of HDFS does not store the block size in inode.
             // If the file has more than one block, use the size of the
-            // first block as the blocksize. Otherwise leave the blockSize as 0
-            // to indicate that we do not really know the "true" blocksize of 
-            // this file.
-            if (-7 <= logVersion) {
-              assert blockSize == 0;
+            // first block as the blocksize. Otherwise use the default
+            // block size.
+            if (-8 <= logVersion && blockSize == 0) {
               if (blocks.length > 1) {
                 blockSize = blocks[0].getNumBytes();
+              } else {
+                long first = ((blocks.length == 1)? blocks[0].getNumBytes(): 0);
+                blockSize = Math.max(fsNamesys.getDefaultBlockSize(), first);
               }
             }
+
             // add to the file tree
             fsDir.unprotectedAddFile(name.toString(), blocks, replication, 
                                      mtime, blockSize);

+ 7 - 5
src/java/org/apache/hadoop/dfs/FSImage.java

@@ -35,6 +35,7 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.Properties;
 import java.util.Random;
+import java.lang.Math;
 
 import org.apache.hadoop.dfs.FSConstants.StartupOption;
 import org.apache.hadoop.dfs.FSConstants.NodeType;
@@ -693,13 +694,14 @@ class FSImage extends Storage {
         }
         // Older versions of HDFS does not store the block size in inode.
         // If the file has more than one block, use the size of the 
-        // first block as the blocksize. Otherwise leave the blockSize as 0
-        // to indicate that we do not really know the "true" blocksize of this
-        // file.
-        if (-7 <= imgVersion) {
-          assert blockSize == 0;
+        // first block as the blocksize. Otherwise use the default block size.
+        //
+        if (-8 <= imgVersion && blockSize == 0) {
           if (numBlocks > 1) {
             blockSize = blocks[0].getNumBytes();
+          } else {
+            long first = ((numBlocks == 1) ? blocks[0].getNumBytes(): 0);
+            blockSize = Math.max(fsNamesys.getDefaultBlockSize(), first);
           }
         }
         fsDir.unprotectedAddFile(name.toString(), blocks, replication,

+ 9 - 2
src/java/org/apache/hadoop/dfs/FSNamesystem.java

@@ -182,6 +182,8 @@ class FSNamesystem implements FSConstants {
   private long replicationRecheckInterval;
   //decommissionRecheckInterval is how often namenode checks if a node has finished decommission
   private long decommissionRecheckInterval;
+  // default block size of a file
+  private long defaultBlockSize = 0;
   static int replIndex = 0; // last datanode used for replication work
   static int REPL_WORK_PER_ITERATION = 32; // max percent datanodes per iteration
 
@@ -200,7 +202,7 @@ class FSNamesystem implements FSConstants {
   private Daemon dnthread = null;
 
   /**
-   * dirs is a list oif directories where the filesystem directory state 
+   * dirs is a list of directories where the filesystem directory state 
    * is stored
    */
   public FSNamesystem(String hostname,
@@ -306,6 +308,7 @@ class FSNamesystem implements FSConstants {
     this.decommissionRecheckInterval = conf.getInt(
                                                    "dfs.namenode.decommission.interval",
                                                    5 * 60 * 1000);    
+    this.defaultBlockSize = conf.getLong("dfs.block.size", DEFAULT_BLOCK_SIZE);
   }
 
   /** Return the FSNamesystem object
@@ -314,7 +317,7 @@ class FSNamesystem implements FSConstants {
   public static FSNamesystem getFSNamesystem() {
     return fsNamesystemObject;
   } 
-    
+
   NamespaceInfo getNamespaceInfo() {
     return new NamespaceInfo(dir.fsImage.getNamespaceID(),
                              dir.fsImage.getCTime(),
@@ -402,6 +405,10 @@ class FSNamesystem implements FSConstants {
     out.flush();
     out.close();
   }
+
+  long getDefaultBlockSize() {
+    return defaultBlockSize;
+  }
     
   /* get replication factor of a block */
   private int getReplication(Block block) {