Browse Source

HDFS-16533. COMPOSITE_CRC failed between replicated file and striped file due to invalid requested length. (#4155)

Co-authored-by: zengqiang.xu <zengqiang.xu@shopee.com>
xuzq 2 years ago
parent
commit
01a2e0f6bd

+ 4 - 14
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/FileChecksumHelper.java

@@ -303,7 +303,8 @@ final class FileChecksumHelper {
       byte[] blockChecksumBytes = blockChecksumBuf.getData();
 
       long sumBlockLengths = 0;
-      for (int i = 0; i < locatedBlocks.size() - 1; ++i) {
+      int i = 0;
+      for (; i < locatedBlocks.size() - 1; ++i) {
         LocatedBlock block = locatedBlocks.get(i);
         // For everything except the last LocatedBlock, we expect getBlockSize()
         // to accurately reflect the number of file bytes digested in the block
@@ -316,19 +317,8 @@ final class FileChecksumHelper {
             "Added blockCrc 0x{} for block index {} of size {}",
             Integer.toString(blockCrc, 16), i, block.getBlockSize());
       }
-
-      // NB: In some cases the located blocks have their block size adjusted
-      // explicitly based on the requested length, but not all cases;
-      // these numbers may or may not reflect actual sizes on disk.
-      long reportedLastBlockSize =
-          blockLocations.getLastLocatedBlock().getBlockSize();
-      long consumedLastBlockLength = reportedLastBlockSize;
-      if (length - sumBlockLengths < reportedLastBlockSize) {
-        LOG.warn(
-            "Last block length {} is less than reportedLastBlockSize {}",
-            length - sumBlockLengths, reportedLastBlockSize);
-        consumedLastBlockLength = length - sumBlockLengths;
-      }
+      LocatedBlock nextBlock = locatedBlocks.get(i);
+      long consumedLastBlockLength = Math.min(length - sumBlockLengths, nextBlock.getBlockSize());
       // NB: blockChecksumBytes.length may be much longer than actual bytes
       // written into the DataOutput.
       int lastBlockCrc = CrcUtil.readInt(

+ 33 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java

@@ -215,6 +215,39 @@ public class TestFileChecksum {
     }
   }
 
+  /**
+   * Test the corner case of the COMPOSITE_CRC.
+   * For Stripe File, last block size in the file is (int)(blockSize * 0.5),
+   *    but the last block size in the check length is (int)(blockSize * 0.6).
+   * For Replicate File, the last block size in the file is (int)(blockSize * 0.5),
+   *    but the last block size in the check length is ((dataBlocks - 1) * blockSize
+   *    + (int) (blockSize * 0.6))
+   */
+  @Test(timeout = 90000)
+  public void testStripedAndReplicatedFileChecksum2() throws Exception {
+    final int lastBlockSize = (int) (blockSize * 0.5);
+    final int fullStripeLength = dataBlocks * blockSize;
+    final int testFileSize = fullStripeLength + lastBlockSize;
+    prepareTestFiles(testFileSize, new String[] {stripedFile1, replicatedFile});
+
+    final int specialLength = (dataBlocks - 1) * blockSize
+        + (int) (blockSize * 0.6);
+
+    Assert.assertTrue(specialLength % blockSize > lastBlockSize);
+    Assert.assertTrue(specialLength % fullStripeLength > lastBlockSize);
+
+    FileChecksum stripedFileChecksum = getFileChecksum(stripedFile1,
+        specialLength, false);
+    FileChecksum replicatedFileChecksum = getFileChecksum(replicatedFile,
+        specialLength, false);
+
+    if (checksumCombineMode.equals(ChecksumCombineMode.COMPOSITE_CRC.name())) {
+      Assert.assertEquals(replicatedFileChecksum, stripedFileChecksum);
+    } else {
+      Assert.assertNotEquals(replicatedFileChecksum, stripedFileChecksum);
+    }
+  }
+
   @Test(timeout = 90000)
   public void testDifferentBlockSizeReplicatedFileChecksum() throws Exception {
     byte[] fileData = StripedFileTestUtil.generateBytes(fileSize);