Browse Source

HDFS-9705. Refine the behaviour of getFileChecksum when length = 0. Contributed by Kai Zheng and SammiChen.

Andrew Wang 8 years ago
parent
commit
cc1292e73a

+ 7 - 3
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java

@@ -1731,10 +1731,14 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
     checkOpen();
     Preconditions.checkArgument(length >= 0);
 
-    LocatedBlocks blockLocations = getBlockLocations(src, length);
+    LocatedBlocks blockLocations = null;
+    FileChecksumHelper.FileChecksumComputer maker = null;
+    ErasureCodingPolicy ecPolicy = null;
+    if (length > 0) {
+      blockLocations = getBlockLocations(src, length);
+      ecPolicy = blockLocations.getErasureCodingPolicy();
+    }
 
-    FileChecksumHelper.FileChecksumComputer maker;
-    ErasureCodingPolicy ecPolicy = blockLocations.getErasureCodingPolicy();
     maker = ecPolicy != null ?
         new FileChecksumHelper.StripedFileNonStripedChecksumComputer(src,
             length, blockLocations, namenode, this, ecPolicy) :

+ 25 - 17
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/FileChecksumHelper.java

@@ -95,11 +95,13 @@ final class FileChecksumHelper {
       this.client = client;
 
       this.remaining = length;
-      if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) {
-        this.remaining = Math.min(length, blockLocations.getFileLength());
-      }
 
-      this.locatedBlocks = blockLocations.getLocatedBlocks();
+      if (blockLocations != null) {
+        if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) {
+          this.remaining = Math.min(length, blockLocations.getFileLength());
+        }
+        this.locatedBlocks = blockLocations.getLocatedBlocks();
+      }
     }
 
     String getSrc() {
@@ -203,9 +205,23 @@ final class FileChecksumHelper {
      * @throws IOException
      */
     void compute() throws IOException {
-      checksumBlocks();
-
-      fileChecksum = makeFinalResult();
+      /**
+       * request length is 0 or the file is empty, return one with the
+       * magic entry that matches what previous hdfs versions return.
+       */
+      if (locatedBlocks == null || locatedBlocks.isEmpty()) {
+        // Explicitly specified here in case the default DataOutputBuffer
+        // buffer length value is changed in future. This matters because the
+        // fixed value 32 has to be used to repeat the magic value for previous
+        // HDFS version.
+        final int lenOfZeroBytes = 32;
+        byte[] emptyBlockMd5 = new byte[lenOfZeroBytes];
+        MD5Hash fileMD5 = MD5Hash.digest(emptyBlockMd5);
+        fileChecksum =  new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5);
+      } else {
+        checksumBlocks();
+        fileChecksum = makeFinalResult();
+      }
     }
 
     /**
@@ -228,15 +244,7 @@ final class FileChecksumHelper {
         return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC,
             crcPerBlock, fileMD5);
       default:
-        // If there is no block allocated for the file,
-        // return one with the magic entry that matches what previous
-        // hdfs versions return.
-        if (locatedBlocks.isEmpty()) {
-          return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5);
-        }
-
-        // we should never get here since the validity was checked
-        // when getCrcType() was called above.
+        // we will get here when crcType is "NULL".
         return null;
       }
     }
@@ -412,7 +420,7 @@ final class FileChecksumHelper {
   }
 
   /**
-   * Striped file checksum computing.
+   * Non-striped checksum computing for striped files.
    */
   static class StripedFileNonStripedChecksumComputer
       extends FileChecksumComputer {

+ 3 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockChecksumHelper.java

@@ -66,7 +66,7 @@ final class BlockChecksumHelper {
   }
 
   /**
-   * The abstract base block checksum computer.
+   * The abstract block checksum computer.
    */
   static abstract class AbstractBlockChecksumComputer {
     private final DataNode datanode;
@@ -139,7 +139,7 @@ final class BlockChecksumHelper {
   }
 
   /**
-   * The abstract base block checksum computer.
+   * The abstract base block checksum computer, mainly for replicated blocks.
    */
   static abstract class BlockChecksumComputer
       extends AbstractBlockChecksumComputer {
@@ -534,4 +534,4 @@ final class BlockChecksumHelper {
       }
     }
   }
-}
+}

+ 13 - 9
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java

@@ -1030,13 +1030,6 @@ public class TestDistributedFileSystem {
         out.close();
       }
 
-      // verify the magic val for zero byte files
-      {
-        final FileChecksum zeroChecksum = hdfs.getFileChecksum(zeroByteFile);
-        assertEquals(zeroChecksum.toString(),
-            "MD5-of-0MD5-of-0CRC32:70bc8f4b72a86921468bf8e8441dce51");
-      }
-
       //write another file
       final Path bar = new Path(dir, "bar" + n);
       {
@@ -1045,8 +1038,19 @@ public class TestDistributedFileSystem {
         out.write(data);
         out.close();
       }
-  
-      { //verify checksum
+
+      {
+        final FileChecksum zeroChecksum = hdfs.getFileChecksum(zeroByteFile);
+        final String magicValue =
+            "MD5-of-0MD5-of-0CRC32:70bc8f4b72a86921468bf8e8441dce51";
+        // verify the magic val for zero byte files
+        assertEquals(magicValue, zeroChecksum.toString());
+
+        //verify checksums for empty file and 0 request length
+        final FileChecksum checksumWith0 = hdfs.getFileChecksum(bar, 0);
+        assertEquals(zeroChecksum, checksumWith0);
+
+        //verify checksum
         final FileChecksum barcs = hdfs.getFileChecksum(bar);
         final int barhashcode = barcs.hashCode();
         assertEquals(hdfsfoocs.hashCode(), barhashcode);