Browse Source

HADOOP-1656. The blockSize of a file is stored persistently
in the file inode. (Dhruba Borthakur via dhruba)



git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@571247 13f79535-47bb-0310-9956-ffa450edef68

Dhruba Borthakur 18 years ago
parent
commit
fd4d43ff88

+ 3 - 0
CHANGES.txt

@@ -16,6 +16,9 @@ Trunk (unreleased changes)
     HADOOP-1621.  FileStatus is now a concrete class and FileSystem.listPaths
     is deprecated and replaced with listStatus. (Chris Douglas via omalley)
 
+    HADOOP-1656.  The blockSize of a file is stored persistently in the file
+    inode. (Dhruba Borthakur via dhruba)
+
   NEW FEATURES
 
     HADOOP-1636.  Allow configuration of the number of jobs kept in

+ 3 - 2
src/java/org/apache/hadoop/dfs/ClientProtocol.java

@@ -32,8 +32,9 @@ interface ClientProtocol extends VersionedProtocol {
   /**
    * Compared to the previous version the following changes have been introduced:
    * 16 : removed deprecated obtainLock() and releaseLock(). 
+   * 17 : getBlockSize replaced by getPreferredBlockSize
    */
-  public static final long versionID = 16L;
+  public static final long versionID = 17L;
   
   ///////////////////////////////////////
   // File contents
@@ -248,7 +249,7 @@ interface ClientProtocol extends VersionedProtocol {
    * @return The number of bytes in each block
    * @throws IOException
    */
-  public long getBlockSize(String filename) throws IOException;
+  public long getPreferredBlockSize(String filename) throws IOException;
 
   /**
    * Enter, leave or get safe mode.

+ 2 - 2
src/java/org/apache/hadoop/dfs/DFSClient.java

@@ -134,7 +134,7 @@ class DFSClient implements FSConstants {
     methodNameToPolicyMap.put("renewLease", methodPolicy);
     methodNameToPolicyMap.put("getStats", methodPolicy);
     methodNameToPolicyMap.put("getDatanodeReport", methodPolicy);
-    methodNameToPolicyMap.put("getBlockSize", methodPolicy);
+    methodNameToPolicyMap.put("getPreferredBlockSize", methodPolicy);
     methodNameToPolicyMap.put("getEditLogSize", methodPolicy);
     methodNameToPolicyMap.put("complete", methodPolicy);
     methodNameToPolicyMap.put("getEditLogSize", methodPolicy);
@@ -212,7 +212,7 @@ class DFSClient implements FSConstants {
     
   public long getBlockSize(String f) throws IOException {
     try {
-      return namenode.getBlockSize(f);
+      return namenode.getPreferredBlockSize(f);
     } catch (IOException ie) {
       LOG.warn("Problem getting block size: " + 
           StringUtils.stringifyException(ie));

+ 1 - 1
src/java/org/apache/hadoop/dfs/DFSFileInfo.java

@@ -51,7 +51,7 @@ class DFSFileInfo extends FileStatus {
     super(node.computeContentsLength(),
           node.isDirectory(), 
           node.isDirectory() ? 0 : ((INodeFile)node).getReplication(), 
-          node.isDirectory() ? 0 : ((INodeFile)node).getBlockSize(),
+          node.isDirectory() ? 0 : ((INodeFile)node).getPreferredBlockSize(),
           node.getModificationTime(), new Path(path));
   }
 

+ 2 - 2
src/java/org/apache/hadoop/dfs/FSConstants.java

@@ -156,7 +156,7 @@ public interface FSConstants {
   // Version is reflected in the data storage file.
   // Versions are negative.
   // Decrement LAYOUT_VERSION to define a new version.
-  public static final int LAYOUT_VERSION = -7;
+  public static final int LAYOUT_VERSION = -8;
   // Current version: 
-  // Block Level CRCs added.
+  // added blocksize to inode
 }

+ 12 - 7
src/java/org/apache/hadoop/dfs/FSDirectory.java

@@ -114,7 +114,8 @@ class FSDirectory implements FSConstants {
   /**
    * Add the given filename to the fs.
    */
-  public boolean addFile(String path, Block[] blocks, short replication) {
+  public boolean addFile(String path, Block[] blocks, short replication,
+                         long preferredBlockSize) {
     waitForReady();
 
     // Always do an implicit mkdirs for parent directory tree.
@@ -122,7 +123,9 @@ class FSDirectory implements FSConstants {
     if (!mkdirs(new Path(path).getParent().toString(), modTime)) {
       return false;
     }
-    INodeFile newNode = (INodeFile)unprotectedAddFile(path, blocks, replication, modTime);
+    INodeFile newNode = (INodeFile)unprotectedAddFile(path, blocks, replication,
+                                                      modTime, 
+                                                      preferredBlockSize);
     if (newNode == null) {
       NameNode.stateChangeLog.info("DIR* FSDirectory.addFile: "
                                    +"failed to add "+path+" with "
@@ -141,12 +144,14 @@ class FSDirectory implements FSConstants {
   INode unprotectedAddFile( String path, 
                             Block[] blocks, 
                             short replication,
-                            long modificationTime) {
+                            long modificationTime,
+                            long preferredBlockSize) {
     INode newNode;
     if (blocks == null)
       newNode = new INodeDirectory(modificationTime);
     else
-      newNode = new INodeFile(blocks, replication, modificationTime);
+      newNode = new INodeFile(blocks, replication, modificationTime,
+                              preferredBlockSize);
     synchronized (rootDir) {
       try {
         newNode = rootDir.addNode(path, newNode);
@@ -304,10 +309,10 @@ class FSDirectory implements FSConstants {
   /**
    * Get the blocksize of a file
    * @param filename the filename
-   * @return the number of bytes in the first block
+   * @return the number of bytes 
    * @throws IOException if it is a directory or does not exist.
    */
-  public long getBlockSize(String filename) throws IOException {
+  public long getPreferredBlockSize(String filename) throws IOException {
     synchronized (rootDir) {
       INode fileNode = rootDir.getNode(filename);
       if (fileNode == null) {
@@ -317,7 +322,7 @@ class FSDirectory implements FSConstants {
         throw new IOException("Getting block size of a directory: " + 
                               filename);
       }
-      return ((INodeFile)fileNode).getBlockSize();
+      return ((INodeFile)fileNode).getPreferredBlockSize();
     }
   }
     

+ 34 - 13
src/java/org/apache/hadoop/dfs/FSEditLog.java

@@ -233,6 +233,7 @@ class FSEditLog {
         while (true) {
           long timestamp = 0;
           long mtime = 0;
+          long blockSize = 0;
           byte opcode = -1;
           try {
             opcode = in.readByte();
@@ -253,10 +254,13 @@ class FSEditLog {
               aw = new ArrayWritable(UTF8.class);
               aw.readFields(in);
               writables = aw.get(); 
-              if (logVersion >= -4 && writables.length != 2 ||
-                  logVersion < -4 && writables.length != 3) {
-                  throw new IOException("Incorrect data fortmat. " 
-                                        + "Name & replication pair expected");
+              if (-4 <= logVersion && writables.length != 2 ||
+                  -7 <= logVersion && logVersion < -4 && writables.length != 3||
+                  logVersion < -7 && writables.length != 4) {
+                  throw new IOException("Incorrect data format."  +
+                                        " logVersion is " + logVersion +
+                                        " but writables.length is " +
+                                        writables.length + ". ");
               }
               name = (UTF8) writables[0];
               replication = Short.parseShort(
@@ -265,6 +269,9 @@ class FSEditLog {
               if (logVersion < -4) {
                 mtime = Long.parseLong(((UTF8)writables[2]).toString());
               }
+              if (logVersion < -7) {
+                blockSize = Long.parseLong(((UTF8)writables[3]).toString());
+              }
             }
             // get blocks
             aw = new ArrayWritable(Block.class);
@@ -272,8 +279,21 @@ class FSEditLog {
             writables = aw.get();
             Block blocks[] = new Block[writables.length];
             System.arraycopy(writables, 0, blocks, 0, blocks.length);
+
+            // Older versions of HDFS does not store the block size in inode.
+            // If the file has more than one block, use the size of the
+            // first block as the blocksize. Otherwise leave the blockSize as 0
+            // to indicate that we do not really know the "true" blocksize of 
+            // this file.
+            if (-7 <= logVersion) {
+              assert blockSize == 0;
+              if (blocks.length > 1) {
+                blockSize = blocks[0].getNumBytes();
+              }
+            }
             // add to the file tree
-            fsDir.unprotectedAddFile(name.toString(), blocks, replication, mtime);
+            fsDir.unprotectedAddFile(name.toString(), blocks, replication, 
+                                     mtime, blockSize);
             break;
           }
           case OP_SET_REPLICATION: {
@@ -302,7 +322,7 @@ class FSEditLog {
               aw.readFields(in);
               writables = aw.get(); 
               if (writables.length != 3) {
-                throw new IOException("Incorrect data fortmat. " 
+                throw new IOException("Incorrect data format. " 
                                       + "Mkdir operation.");
               }
               src = (UTF8) writables[0];
@@ -324,7 +344,7 @@ class FSEditLog {
               aw.readFields(in);
               writables = aw.get(); 
               if (writables.length != 2) {
-                throw new IOException("Incorrect data fortmat. " 
+                throw new IOException("Incorrect data format. " 
                                       + "delete operation.");
               }
               src = (UTF8) writables[0];
@@ -345,7 +365,7 @@ class FSEditLog {
               aw.readFields(in);
               writables = aw.get(); 
               if (writables.length != 2) {
-                throw new IOException("Incorrect data fortmat. " 
+                throw new IOException("Incorrect data format. " 
                                       + "Mkdir operation.");
               }
               src = (UTF8) writables[0];
@@ -480,7 +500,8 @@ class FSEditLog {
     UTF8 nameReplicationPair[] = new UTF8[] { 
       new UTF8(path), 
       FSEditLog.toLogReplication(newNode.getReplication()),
-      FSEditLog.toLogTimeStamp(newNode.getModificationTime())};
+      FSEditLog.toLogLong(newNode.getModificationTime()),
+      FSEditLog.toLogLong(newNode.getPreferredBlockSize())};
     logEdit(OP_ADD,
             new ArrayWritable(UTF8.class, nameReplicationPair), 
             new ArrayWritable(Block.class, newNode.getBlocks()));
@@ -492,7 +513,7 @@ class FSEditLog {
   void logMkDir(String path, INode newNode) {
     UTF8 info[] = new UTF8[] {
       new UTF8(path),
-      FSEditLog.toLogTimeStamp(newNode.getModificationTime())
+      FSEditLog.toLogLong(newNode.getModificationTime())
     };
     logEdit(OP_MKDIR, new ArrayWritable(UTF8.class, info), null);
   }
@@ -505,7 +526,7 @@ class FSEditLog {
     UTF8 info[] = new UTF8[] { 
       new UTF8(src),
       new UTF8(dst),
-      FSEditLog.toLogTimeStamp(timestamp)};
+      FSEditLog.toLogLong(timestamp)};
     logEdit(OP_RENAME, new ArrayWritable(UTF8.class, info), null);
   }
   
@@ -524,7 +545,7 @@ class FSEditLog {
   void logDelete(String src, long timestamp) {
     UTF8 info[] = new UTF8[] { 
       new UTF8(src),
-      FSEditLog.toLogTimeStamp(timestamp)};
+      FSEditLog.toLogLong(timestamp)};
     logEdit(OP_DELETE, new ArrayWritable(UTF8.class, info), null);
   }
   
@@ -552,7 +573,7 @@ class FSEditLog {
     return Short.parseShort(replication.toString());
   }
 
-  static UTF8 toLogTimeStamp(long timestamp) {
+  static UTF8 toLogLong(long timestamp) {
     return new UTF8(Long.toString(timestamp));
   }
 

+ 18 - 1
src/java/org/apache/hadoop/dfs/FSImage.java

@@ -669,6 +669,7 @@ class FSImage extends Storage {
       for (int i = 0; i < numFiles; i++) {
         UTF8 name = new UTF8();
         long modificationTime = 0;
+        long blockSize = 0;
         name.readFields(in);
         // version 0 does not support per file replication
         if (!(imgVersion >= 0)) {
@@ -678,6 +679,9 @@ class FSImage extends Storage {
         if (imgVersion <= -5) {
           modificationTime = in.readLong();
         }
+        if (imgVersion <= -8) {
+          blockSize = in.readLong();
+        }
         int numBlocks = in.readInt();
         Block blocks[] = null;
         if (numBlocks > 0) {
@@ -687,8 +691,19 @@ class FSImage extends Storage {
             blocks[j].readFields(in);
           }
         }
+        // Older versions of HDFS does not store the block size in inode.
+        // If the file has more than one block, use the size of the 
+        // first block as the blocksize. Otherwise leave the blockSize as 0
+        // to indicate that we do not really know the "true" blocksize of this
+        // file.
+        if (-7 <= imgVersion) {
+          assert blockSize == 0;
+          if (numBlocks > 1) {
+            blockSize = blocks[0].getNumBytes();
+          }
+        }
         fsDir.unprotectedAddFile(name.toString(), blocks, replication,
-                                 modificationTime);
+                                 modificationTime, blockSize);
       }
       
       // load datanode info
@@ -814,6 +829,7 @@ class FSImage extends Storage {
         INodeFile fileINode = (INodeFile)inode;
         out.writeShort(fileINode.getReplication());
         out.writeLong(inode.getModificationTime());
+        out.writeLong(fileINode.getPreferredBlockSize());
         Block[] blocks = fileINode.getBlocks();
         out.writeInt(blocks.length);
         for (Block blk : blocks)
@@ -823,6 +839,7 @@ class FSImage extends Storage {
       // write directory inode
       out.writeShort(0);  // replication
       out.writeLong(inode.getModificationTime());
+      out.writeLong(0);   // preferred block size
       out.writeInt(0);    // # of blocks
     }
     for(INode child : ((INodeDirectory)inode).getChildren()) {

+ 3 - 3
src/java/org/apache/hadoop/dfs/FSNamesystem.java

@@ -673,8 +673,8 @@ class FSNamesystem implements FSConstants {
     return true;
   }
     
-  public long getBlockSize(String filename) throws IOException {
-    return dir.getBlockSize(filename);
+  public long getPreferredBlockSize(String filename) throws IOException {
+    return dir.getPreferredBlockSize(filename);
   }
     
   /**
@@ -824,7 +824,7 @@ class FSNamesystem implements FSConstants {
     // Now we can add the name to the filesystem. This file has no
     // blocks associated with it.
     //
-    if (!dir.addFile(src, new Block[0], replication)) {
+    if (!dir.addFile(src, new Block[0], replication, blockSize)) {
       throw new IOException("DIR* NameSystem.startFile: " +
                             "Unable to add file to namespace.");
     }

+ 7 - 8
src/java/org/apache/hadoop/dfs/INode.java

@@ -404,13 +404,16 @@ class INodeDirectory extends INode {
 class INodeFile extends INode {
   private Block blocks[] = null;
   protected short blockReplication;
+  protected long preferredBlockSize;
 
   /**
    */
-  INodeFile(Block blocks[], short replication, long modificationTime) {
+  INodeFile(Block blocks[], short replication, long modificationTime,
+            long preferredBlockSize) {
     super(modificationTime);
     this.blocks = blocks;
     this.blockReplication = replication;
+    this.preferredBlockSize = preferredBlockSize;
   }
 
   boolean isDirectory() {
@@ -464,14 +467,10 @@ class INodeFile extends INode {
   }
 
   /**
-   * Get the block size of the first block
+   * Get the preferred block size of the file.
    * @return the number of bytes
    */
-  long getBlockSize() {
-    if (blocks == null || blocks.length == 0) {
-      return 0;
-    } else {
-      return blocks[0].getNumBytes();
-    }
+  long getPreferredBlockSize() {
+    return preferredBlockSize;
   }
 }

+ 2 - 2
src/java/org/apache/hadoop/dfs/NameNode.java

@@ -373,8 +373,8 @@ public class NameNode implements ClientProtocol, DatanodeProtocol, FSConstants {
     }
   }
 
-  public long getBlockSize(String filename) throws IOException {
-    return namesystem.getBlockSize(filename);
+  public long getPreferredBlockSize(String filename) throws IOException {
+    return namesystem.getPreferredBlockSize(filename);
   }
     
   /**

+ 19 - 3
src/test/org/apache/hadoop/dfs/TestFileStatus.java

@@ -40,7 +40,8 @@ public class TestFileStatus extends TestCase {
     new Path(System.getProperty("test.build.data","/tmp"))
     .toString().replace(' ', '+');
   
-  private void writeFile(FileSystem fileSys, Path name, int repl)
+  private void writeFile(FileSystem fileSys, Path name, int repl,
+                         int fileSize, int blockSize)
     throws IOException {
     // create and write a file that contains three blocks of data
     FSDataOutputStream stm = fileSys.create(name, true,
@@ -94,12 +95,14 @@ public class TestFileStatus extends TestCase {
       // create a file in home directory
       //
       Path file1 = new Path("filestatus.dat");
-      writeFile(fs, file1, 1);
+      writeFile(fs, file1, 1, fileSize, blockSize);
       System.out.println("Created file filestatus.dat with one "
                          + " replicas.");
       checkFile(fs, file1, 1);
       assertTrue(file1 + " should be a file", 
                   fs.getFileStatus(file1).isDir() == false);
+      assertTrue(fs.getFileStatus(file1).getBlockSize() == blockSize);
+      assertTrue(fs.getFileStatus(file1).getReplication() == 1);
       System.out.println("Path : \"" + file1 + "\"");
 
       // create a directory
@@ -110,7 +113,20 @@ public class TestFileStatus extends TestCase {
       assertTrue(dir + " should be a directory", 
                  fs.getFileStatus(path).isDir() == true);
       System.out.println("Dir : \"" + dir + "\"");
-    
+
+      // create another file that is smaller than a block.
+      //
+      Path file2 = new Path("filestatus2.dat");
+      writeFile(fs, file2, 1, blockSize/4, blockSize);
+      System.out.println("Created file filestatus2.dat with one "
+                         + " replicas.");
+      checkFile(fs, file2, 1);
+      System.out.println("Path : \"" + file2 + "\"");
+
+      // verify file attributes
+      assertTrue(fs.getFileStatus(file2).getBlockSize() == blockSize);
+      assertTrue(fs.getFileStatus(file2).getReplication() == 1);
+
     } finally {
       fs.close();
       cluster.shutdown();