Browse Source

HDFS-6561. org.apache.hadoop.util.DataChecksum should support native checksumming (James Thomas via Colin Patrick McCabe)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1618680 13f79535-47bb-0310-9956-ffa450edef68
Colin McCabe 10 years ago
parent
commit
e446497cd1

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -527,6 +527,9 @@ Release 2.6.0 - UNRELEASED
     HADOOP-10335. An ip whilelist based implementation to resolve Sasl
     HADOOP-10335. An ip whilelist based implementation to resolve Sasl
     properties per connection. (Benoy Antony via Arpit Agarwal)
     properties per connection. (Benoy Antony via Arpit Agarwal)
 
 
+    HDFS-6561. org.apache.hadoop.util.DataChecksum should support native
+    checksumming (James Thomas via Colin Patrick McCabe)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     HADOOP-10838. Byte array native checksumming. (James Thomas via todd)
     HADOOP-10838. Byte array native checksumming. (James Thomas via todd)

+ 13 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java

@@ -390,6 +390,12 @@ public class DataChecksum implements Checksum {
           checksums.array(), checksums.arrayOffset() + checksums.position());
           checksums.array(), checksums.arrayOffset() + checksums.position());
       return;
       return;
     }
     }
+
+    if (NativeCrc32.isAvailable()) {
+      NativeCrc32.calculateChunkedSums(bytesPerChecksum, type.id,
+          checksums, data);
+      return;
+    }
     
     
     data.mark();
     data.mark();
     checksums.mark();
     checksums.mark();
@@ -412,10 +418,16 @@ public class DataChecksum implements Checksum {
    * Implementation of chunked calculation specifically on byte arrays. This
    * Implementation of chunked calculation specifically on byte arrays. This
    * is to avoid the copy when dealing with ByteBuffers that have array backing.
    * is to avoid the copy when dealing with ByteBuffers that have array backing.
    */
    */
-  private void calculateChunkedSums(
+  public void calculateChunkedSums(
       byte[] data, int dataOffset, int dataLength,
       byte[] data, int dataOffset, int dataLength,
       byte[] sums, int sumsOffset) {
       byte[] sums, int sumsOffset) {
 
 
+    if (NativeCrc32.isAvailable()) {
+      NativeCrc32.calculateChunkedSumsByteArray(bytesPerChecksum, type.id,
+          sums, sumsOffset, data, dataOffset, dataLength);
+      return;
+    }
+
     int remaining = dataLength;
     int remaining = dataLength;
     while (remaining > 0) {
     while (remaining > 0) {
       int n = Math.min(remaining, bytesPerChecksum);
       int n = Math.min(remaining, bytesPerChecksum);

+ 25 - 8
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCrc32.java

@@ -54,33 +54,50 @@ class NativeCrc32 {
   public static void verifyChunkedSums(int bytesPerSum, int checksumType,
   public static void verifyChunkedSums(int bytesPerSum, int checksumType,
       ByteBuffer sums, ByteBuffer data, String fileName, long basePos)
       ByteBuffer sums, ByteBuffer data, String fileName, long basePos)
       throws ChecksumException {
       throws ChecksumException {
-    nativeVerifyChunkedSums(bytesPerSum, checksumType,
+    nativeComputeChunkedSums(bytesPerSum, checksumType,
         sums, sums.position(),
         sums, sums.position(),
         data, data.position(), data.remaining(),
         data, data.position(), data.remaining(),
-        fileName, basePos);
+        fileName, basePos, true);
   }
   }
 
 
   public static void verifyChunkedSumsByteArray(int bytesPerSum,
   public static void verifyChunkedSumsByteArray(int bytesPerSum,
       int checksumType, byte[] sums, int sumsOffset, byte[] data,
       int checksumType, byte[] sums, int sumsOffset, byte[] data,
       int dataOffset, int dataLength, String fileName, long basePos)
       int dataOffset, int dataLength, String fileName, long basePos)
       throws ChecksumException {
       throws ChecksumException {
-    nativeVerifyChunkedSumsByteArray(bytesPerSum, checksumType,
+    nativeComputeChunkedSumsByteArray(bytesPerSum, checksumType,
         sums, sumsOffset,
         sums, sumsOffset,
         data, dataOffset, dataLength,
         data, dataOffset, dataLength,
-        fileName, basePos);
+        fileName, basePos, true);
+  }
+
+  public static void calculateChunkedSums(int bytesPerSum, int checksumType,
+      ByteBuffer sums, ByteBuffer data) {
+    nativeComputeChunkedSums(bytesPerSum, checksumType,
+        sums, sums.position(),
+        data, data.position(), data.remaining(),
+        "", 0, false);
+  }
+
+  public static void calculateChunkedSumsByteArray(int bytesPerSum,
+      int checksumType, byte[] sums, int sumsOffset, byte[] data,
+      int dataOffset, int dataLength) {
+    nativeComputeChunkedSumsByteArray(bytesPerSum, checksumType,
+        sums, sumsOffset,
+        data, dataOffset, dataLength,
+        "", 0, false);
   }
   }
   
   
-    private static native void nativeVerifyChunkedSums(
+    private static native void nativeComputeChunkedSums(
       int bytesPerSum, int checksumType,
       int bytesPerSum, int checksumType,
       ByteBuffer sums, int sumsOffset,
       ByteBuffer sums, int sumsOffset,
       ByteBuffer data, int dataOffset, int dataLength,
       ByteBuffer data, int dataOffset, int dataLength,
-      String fileName, long basePos);
+      String fileName, long basePos, boolean verify);
 
 
-    private static native void nativeVerifyChunkedSumsByteArray(
+    private static native void nativeComputeChunkedSumsByteArray(
       int bytesPerSum, int checksumType,
       int bytesPerSum, int checksumType,
       byte[] sums, int sumsOffset,
       byte[] sums, int sumsOffset,
       byte[] data, int dataOffset, int dataLength,
       byte[] data, int dataOffset, int dataLength,
-      String fileName, long basePos);
+      String fileName, long basePos, boolean verify);
 
 
   // Copy the constants over from DataChecksum so that javah will pick them up
   // Copy the constants over from DataChecksum so that javah will pick them up
   // and make them available in the native code header.
   // and make them available in the native code header.

+ 15 - 15
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCrc32.c

@@ -117,12 +117,12 @@ static int convert_java_crc_type(JNIEnv *env, jint crc_type) {
   }
   }
 }
 }
 
 
-JNIEXPORT void JNICALL Java_org_apache_hadoop_util_NativeCrc32_nativeVerifyChunkedSums
+JNIEXPORT void JNICALL Java_org_apache_hadoop_util_NativeCrc32_nativeComputeChunkedSums
   (JNIEnv *env, jclass clazz,
   (JNIEnv *env, jclass clazz,
     jint bytes_per_checksum, jint j_crc_type,
     jint bytes_per_checksum, jint j_crc_type,
     jobject j_sums, jint sums_offset,
     jobject j_sums, jint sums_offset,
     jobject j_data, jint data_offset, jint data_len,
     jobject j_data, jint data_offset, jint data_len,
-    jstring j_filename, jlong base_pos)
+    jstring j_filename, jlong base_pos, jboolean verify)
 {
 {
   uint8_t *sums_addr;
   uint8_t *sums_addr;
   uint8_t *data_addr;
   uint8_t *data_addr;
@@ -166,27 +166,27 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_util_NativeCrc32_nativeVerifyChunk
   if (crc_type == -1) return; // exception already thrown
   if (crc_type == -1) return; // exception already thrown
 
 
   // Setup complete. Actually verify checksums.
   // Setup complete. Actually verify checksums.
-  ret = bulk_verify_crc(data, data_len, sums, crc_type,
-                            bytes_per_checksum, &error_data);
-  if (likely(ret == CHECKSUMS_VALID)) {
+  ret = bulk_crc(data, data_len, sums, crc_type,
+                            bytes_per_checksum, verify ? &error_data : NULL);
+  if (likely(verify && ret == CHECKSUMS_VALID || !verify && ret == 0)) {
     return;
     return;
-  } else if (unlikely(ret == INVALID_CHECKSUM_DETECTED)) {
+  } else if (unlikely(verify && ret == INVALID_CHECKSUM_DETECTED)) {
     long pos = base_pos + (error_data.bad_data - data);
     long pos = base_pos + (error_data.bad_data - data);
     throw_checksum_exception(
     throw_checksum_exception(
       env, error_data.got_crc, error_data.expected_crc,
       env, error_data.got_crc, error_data.expected_crc,
       j_filename, pos);
       j_filename, pos);
   } else {
   } else {
     THROW(env, "java/lang/AssertionError",
     THROW(env, "java/lang/AssertionError",
-      "Bad response code from native bulk_verify_crc");
+      "Bad response code from native bulk_crc");
   }
   }
 }
 }
 
 
-JNIEXPORT void JNICALL Java_org_apache_hadoop_util_NativeCrc32_nativeVerifyChunkedSumsByteArray
+JNIEXPORT void JNICALL Java_org_apache_hadoop_util_NativeCrc32_nativeComputeChunkedSumsByteArray
   (JNIEnv *env, jclass clazz,
   (JNIEnv *env, jclass clazz,
     jint bytes_per_checksum, jint j_crc_type,
     jint bytes_per_checksum, jint j_crc_type,
     jarray j_sums, jint sums_offset,
     jarray j_sums, jint sums_offset,
     jarray j_data, jint data_offset, jint data_len,
     jarray j_data, jint data_offset, jint data_len,
-    jstring j_filename, jlong base_pos)
+    jstring j_filename, jlong base_pos, jboolean verify)
 {
 {
   uint8_t *sums_addr;
   uint8_t *sums_addr;
   uint8_t *data_addr;
   uint8_t *data_addr;
@@ -237,21 +237,21 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_util_NativeCrc32_nativeVerifyChunk
     data = data_addr + data_offset + checksumNum * bytes_per_checksum;
     data = data_addr + data_offset + checksumNum * bytes_per_checksum;
 
 
     // Setup complete. Actually verify checksums.
     // Setup complete. Actually verify checksums.
-    ret = bulk_verify_crc(data, MIN(numChecksumsPerIter * bytes_per_checksum,
-                                    data_len - checksumNum * bytes_per_checksum),
-                          sums, crc_type, bytes_per_checksum, &error_data);
+    ret = bulk_crc(data, MIN(numChecksumsPerIter * bytes_per_checksum,
+                             data_len - checksumNum * bytes_per_checksum),
+                   sums, crc_type, bytes_per_checksum, verify ? &error_data : NULL);
     (*env)->ReleasePrimitiveArrayCritical(env, j_data, data_addr, 0);
     (*env)->ReleasePrimitiveArrayCritical(env, j_data, data_addr, 0);
     (*env)->ReleasePrimitiveArrayCritical(env, j_sums, sums_addr, 0);
     (*env)->ReleasePrimitiveArrayCritical(env, j_sums, sums_addr, 0);
-    if (unlikely(ret == INVALID_CHECKSUM_DETECTED)) {
+    if (unlikely(verify && ret == INVALID_CHECKSUM_DETECTED)) {
       long pos = base_pos + (error_data.bad_data - data) + checksumNum *
       long pos = base_pos + (error_data.bad_data - data) + checksumNum *
         bytes_per_checksum;
         bytes_per_checksum;
       throw_checksum_exception(
       throw_checksum_exception(
         env, error_data.got_crc, error_data.expected_crc,
         env, error_data.got_crc, error_data.expected_crc,
         j_filename, pos);
         j_filename, pos);
       return;
       return;
-    } else if (unlikely(ret != CHECKSUMS_VALID)) {
+    } else if (unlikely(verify && ret != CHECKSUMS_VALID || !verify && ret != 0)) {
       THROW(env, "java/lang/AssertionError",
       THROW(env, "java/lang/AssertionError",
-        "Bad response code from native bulk_verify_crc");
+        "Bad response code from native bulk_crc");
       return;
       return;
     }
     }
     checksumNum += numChecksumsPerIter;
     checksumNum += numChecksumsPerIter;

+ 21 - 39
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.c

@@ -55,40 +55,23 @@ static void pipelined_crc32c(uint32_t *crc1, uint32_t *crc2, uint32_t *crc3, con
 static int cached_cpu_supports_crc32; // initialized by constructor below
 static int cached_cpu_supports_crc32; // initialized by constructor below
 static uint32_t crc32c_hardware(uint32_t crc, const uint8_t* data, size_t length);
 static uint32_t crc32c_hardware(uint32_t crc, const uint8_t* data, size_t length);
 
 
-int bulk_calculate_crc(const uint8_t *data, size_t data_len,
-                    uint32_t *sums, int checksum_type,
-                    int bytes_per_checksum) {
-  uint32_t crc;
-  crc_update_func_t crc_update_func;
-
-  switch (checksum_type) {
-    case CRC32_ZLIB_POLYNOMIAL:
-      crc_update_func = crc32_zlib_sb8;
-      break;
-    case CRC32C_POLYNOMIAL:
-      crc_update_func = crc32c_sb8;
-      break;
-    default:
-      return -EINVAL;
-      break;
+static inline int store_or_verify(uint32_t *sums, uint32_t crc,
+                                   int is_verify) {
+  if (!is_verify) {
+    *sums = crc;
+    return 1;
+  } else {
+    return crc == *sums;
   }
   }
-  while (likely(data_len > 0)) {
-    int len = likely(data_len >= bytes_per_checksum) ? bytes_per_checksum : data_len;
-    crc = CRC_INITIAL_VAL;
-    crc = crc_update_func(crc, data, len);
-    *sums = ntohl(crc_val(crc));
-    data += len;
-    data_len -= len;
-    sums++;
-  }
-  return 0;
 }
 }
 
 
-int bulk_verify_crc(const uint8_t *data, size_t data_len,
-                    const uint32_t *sums, int checksum_type,
+int bulk_crc(const uint8_t *data, size_t data_len,
+                    uint32_t *sums, int checksum_type,
                     int bytes_per_checksum,
                     int bytes_per_checksum,
                     crc32_error_t *error_info) {
                     crc32_error_t *error_info) {
 
 
+  int is_verify = error_info != NULL;
+
 #ifdef USE_PIPELINED
 #ifdef USE_PIPELINED
   uint32_t crc1, crc2, crc3;
   uint32_t crc1, crc2, crc3;
   int n_blocks = data_len / bytes_per_checksum;
   int n_blocks = data_len / bytes_per_checksum;
@@ -112,7 +95,7 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
       }
       }
       break;
       break;
     default:
     default:
-      return INVALID_CHECKSUM_TYPE;
+      return is_verify ? INVALID_CHECKSUM_TYPE : -EINVAL;
   }
   }
 
 
 #ifdef USE_PIPELINED
 #ifdef USE_PIPELINED
@@ -122,16 +105,15 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
       crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
       crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
       pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, 3);
       pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, 3);
 
 
-      crc = ntohl(crc_val(crc1));
-      if ((crc = ntohl(crc_val(crc1))) != *sums)
+      if (unlikely(!store_or_verify(sums, (crc = ntohl(crc_val(crc1))), is_verify)))
         goto return_crc_error;
         goto return_crc_error;
       sums++;
       sums++;
       data += bytes_per_checksum;
       data += bytes_per_checksum;
-      if ((crc = ntohl(crc_val(crc2))) != *sums)
+      if (unlikely(!store_or_verify(sums, (crc = ntohl(crc_val(crc2))), is_verify)))
         goto return_crc_error;
         goto return_crc_error;
       sums++;
       sums++;
       data += bytes_per_checksum;
       data += bytes_per_checksum;
-      if ((crc = ntohl(crc_val(crc3))) != *sums)
+      if (unlikely(!store_or_verify(sums, (crc = ntohl(crc_val(crc3))), is_verify)))
         goto return_crc_error;
         goto return_crc_error;
       sums++;
       sums++;
       data += bytes_per_checksum;
       data += bytes_per_checksum;
@@ -143,12 +125,12 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
       crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
       crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
       pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, n_blocks);
       pipelined_crc32c(&crc1, &crc2, &crc3, data, bytes_per_checksum, n_blocks);
 
 
-      if ((crc = ntohl(crc_val(crc1))) != *sums)
+      if (unlikely(!store_or_verify(sums, (crc = ntohl(crc_val(crc1))), is_verify)))
         goto return_crc_error;
         goto return_crc_error;
       data += bytes_per_checksum;
       data += bytes_per_checksum;
       sums++;
       sums++;
       if (n_blocks == 2) {
       if (n_blocks == 2) {
-        if ((crc = ntohl(crc_val(crc2))) != *sums)
+        if (unlikely(!store_or_verify(sums, (crc = ntohl(crc_val(crc2))), is_verify)))
           goto return_crc_error;
           goto return_crc_error;
         sums++;
         sums++;
         data += bytes_per_checksum;
         data += bytes_per_checksum;
@@ -160,10 +142,10 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
       crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
       crc1 = crc2 = crc3 = CRC_INITIAL_VAL;
       pipelined_crc32c(&crc1, &crc2, &crc3, data, remainder, 1);
       pipelined_crc32c(&crc1, &crc2, &crc3, data, remainder, 1);
 
 
-      if ((crc = ntohl(crc_val(crc1))) != *sums)
+      if (unlikely(!store_or_verify(sums, (crc = ntohl(crc_val(crc1))), is_verify)))
         goto return_crc_error;
         goto return_crc_error;
     }
     }
-    return CHECKSUMS_VALID;
+    return is_verify ? CHECKSUMS_VALID : 0;
   }
   }
 #endif
 #endif
 
 
@@ -172,14 +154,14 @@ int bulk_verify_crc(const uint8_t *data, size_t data_len,
     crc = CRC_INITIAL_VAL;
     crc = CRC_INITIAL_VAL;
     crc = crc_update_func(crc, data, len);
     crc = crc_update_func(crc, data, len);
     crc = ntohl(crc_val(crc));
     crc = ntohl(crc_val(crc));
-    if (unlikely(crc != *sums)) {
+    if (unlikely(!store_or_verify(sums, crc, is_verify))) {
       goto return_crc_error;
       goto return_crc_error;
     }
     }
     data += len;
     data += len;
     data_len -= len;
     data_len -= len;
     sums++;
     sums++;
   }
   }
-  return CHECKSUMS_VALID;
+  return is_verify ? CHECKSUMS_VALID : 0;
 
 
 return_crc_error:
 return_crc_error:
   if (error_info != NULL) {
   if (error_info != NULL) {

+ 16 - 33
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/bulk_crc32.h

@@ -42,49 +42,32 @@ typedef struct crc32_error {
 
 
 
 
 /**
 /**
- * Verify a buffer of data which is checksummed in chunks
- * of bytes_per_checksum bytes. The checksums are each 32 bits
- * and are stored in sequential indexes of the 'sums' array.
+ * Either calculates checksums for or verifies a buffer of data.
+ * Checksums performed in chunks of bytes_per_checksum bytes. The checksums
+ * are each 32 bits and are stored in sequential indexes of the 'sums' array.
+ * Verification is done (sums is assumed to already contain the checksums)
+ * if error_info is non-null; otherwise calculation is done and checksums
+ * are stored into sums.
  *
  *
  * @param data                  The data to checksum
  * @param data                  The data to checksum
  * @param dataLen               Length of the data buffer
  * @param dataLen               Length of the data buffer
- * @param sums                  (out param) buffer to write checksums into.
- *                              It must contain at least dataLen * 4 bytes.
+ * @param sums                  (out param) buffer to write checksums into or
+ *                              where checksums are already stored.
+ *                              It must contain at least
+ *                              ((dataLen - 1) / bytes_per_checksum + 1) * 4 bytes.
  * @param checksum_type         One of the CRC32 algorithm constants defined 
  * @param checksum_type         One of the CRC32 algorithm constants defined 
  *                              above
  *                              above
  * @param bytes_per_checksum    How many bytes of data to process per checksum.
  * @param bytes_per_checksum    How many bytes of data to process per checksum.
- * @param error_info            If non-NULL, will be filled in if an error
- *                              is detected
+ * @param error_info            If non-NULL, verification will be performed and
+ *                              it will be filled in if an error
+ *                              is detected. Otherwise calculation is performed.
  *
  *
  * @return                      0 for success, non-zero for an error, result codes
  * @return                      0 for success, non-zero for an error, result codes
- *                              for which are defined above
+ *                              for verification are defined above
  */
  */
-extern int bulk_verify_crc(const uint8_t *data, size_t data_len,
-    const uint32_t *sums, int checksum_type,
+extern int bulk_crc(const uint8_t *data, size_t data_len,
+    uint32_t *sums, int checksum_type,
     int bytes_per_checksum,
     int bytes_per_checksum,
     crc32_error_t *error_info);
     crc32_error_t *error_info);
 
 
-/**
- * Calculate checksums for some data.
- *
- * The checksums are each 32 bits and are stored in sequential indexes of the
- * 'sums' array.
- *
- * This function is not (yet) optimized.  It is provided for testing purposes
- * only.
- *
- * @param data                  The data to checksum
- * @param dataLen               Length of the data buffer
- * @param sums                  (out param) buffer to write checksums into.
- *                              It must contain at least dataLen * 4 bytes.
- * @param checksum_type         One of the CRC32 algorithm constants defined 
- *                              above
- * @param bytesPerChecksum      How many bytes of data to process per checksum.
- *
- * @return                      0 for success, non-zero for an error
- */
-int bulk_calculate_crc(const uint8_t *data, size_t data_len,
-                    uint32_t *sums, int checksum_type,
-                    int bytes_per_checksum);
-
 #endif
 #endif

+ 3 - 3
hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/util/test_bulk_crc32.c

@@ -48,9 +48,9 @@ static int testBulkVerifyCrc(int dataLen, int crcType, int bytesPerChecksum)
   sums = calloc(sizeof(uint32_t),
   sums = calloc(sizeof(uint32_t),
                 (dataLen + bytesPerChecksum - 1) / bytesPerChecksum);
                 (dataLen + bytesPerChecksum - 1) / bytesPerChecksum);
 
 
-  EXPECT_ZERO(bulk_calculate_crc(data, dataLen, sums, crcType,
-                                 bytesPerChecksum));
-  EXPECT_ZERO(bulk_verify_crc(data, dataLen, sums, crcType,
+  EXPECT_ZERO(bulk_crc(data, dataLen, sums, crcType,
+                                 bytesPerChecksum, NULL));
+  EXPECT_ZERO(bulk_crc(data, dataLen, sums, crcType,
                             bytesPerChecksum, &errorData));
                             bytesPerChecksum, &errorData));
   free(data);
   free(data);
   free(sums);
   free(sums);

+ 101 - 53
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestDataChecksum.java

@@ -19,6 +19,9 @@ package org.apache.hadoop.util;
 
 
 import java.nio.ByteBuffer;
 import java.nio.ByteBuffer;
 import java.util.Random;
 import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.base.Stopwatch;
 
 
 import org.apache.hadoop.fs.ChecksumException;
 import org.apache.hadoop.fs.ChecksumException;
 import org.junit.Test;
 import org.junit.Test;
@@ -53,68 +56,113 @@ public class TestDataChecksum {
       }
       }
     }
     }
   }
   }
-  
-  private void doBulkTest(DataChecksum checksum, int dataLength,
-      boolean useDirect) throws Exception {
-    System.err.println("Testing bulk checksums of length " + 
-        dataLength + " with " +
-        (useDirect ? "direct" : "array-backed") + " buffers");
-    int numSums = (dataLength - 1)/checksum.getBytesPerChecksum() + 1;
-    int sumsLength = numSums * checksum.getChecksumSize();
-    
-    byte data[] = new byte[dataLength +
-                           DATA_OFFSET_IN_BUFFER +
-                           DATA_TRAILER_IN_BUFFER];
-    new Random().nextBytes(data);
-    ByteBuffer dataBuf = ByteBuffer.wrap(
+
+  private static class Harness {
+    final DataChecksum checksum;
+    final int dataLength, sumsLength, numSums;
+    ByteBuffer dataBuf, checksumBuf;
+
+    Harness(DataChecksum checksum, int dataLength, boolean useDirect) {
+      this.checksum = checksum;
+      this.dataLength = dataLength;
+
+      numSums = (dataLength - 1)/checksum.getBytesPerChecksum() + 1;
+      sumsLength = numSums * checksum.getChecksumSize();
+
+      byte data[] = new byte[dataLength +
+                             DATA_OFFSET_IN_BUFFER +
+                             DATA_TRAILER_IN_BUFFER];
+      new Random().nextBytes(data);
+      dataBuf = ByteBuffer.wrap(
         data, DATA_OFFSET_IN_BUFFER, dataLength);
         data, DATA_OFFSET_IN_BUFFER, dataLength);
 
 
-    byte checksums[] = new byte[SUMS_OFFSET_IN_BUFFER + sumsLength];
-    ByteBuffer checksumBuf = ByteBuffer.wrap(
+      byte checksums[] = new byte[SUMS_OFFSET_IN_BUFFER + sumsLength];
+      checksumBuf = ByteBuffer.wrap(
         checksums, SUMS_OFFSET_IN_BUFFER, sumsLength);
         checksums, SUMS_OFFSET_IN_BUFFER, sumsLength);
-    
-    // Swap out for direct buffers if requested.
-    if (useDirect) {
-      dataBuf = directify(dataBuf);
-      checksumBuf = directify(checksumBuf);
+
+      // Swap out for direct buffers if requested.
+      if (useDirect) {
+        dataBuf = directify(dataBuf);
+        checksumBuf = directify(checksumBuf);
+      }
     }
     }
-    
-    // calculate real checksum, make sure it passes
-    checksum.calculateChunkedSums(dataBuf, checksumBuf);
-    checksum.verifyChunkedSums(dataBuf, checksumBuf, "fake file", 0);
-
-    // Change a byte in the header and in the trailer, make sure
-    // it doesn't affect checksum result
-    corruptBufferOffset(checksumBuf, 0);
-    checksum.verifyChunkedSums(dataBuf, checksumBuf, "fake file", 0);
-    corruptBufferOffset(dataBuf, 0);
-    dataBuf.limit(dataBuf.limit() + 1);
-    corruptBufferOffset(dataBuf, dataLength + DATA_OFFSET_IN_BUFFER);
-    dataBuf.limit(dataBuf.limit() - 1);
-    checksum.verifyChunkedSums(dataBuf, checksumBuf, "fake file", 0);    
-    
-    // Make sure bad checksums fail - error at beginning of array
-    corruptBufferOffset(checksumBuf, SUMS_OFFSET_IN_BUFFER);
-    try {
+
+    void testCorrectness() throws ChecksumException {
+      // calculate real checksum, make sure it passes
+      checksum.calculateChunkedSums(dataBuf, checksumBuf);
       checksum.verifyChunkedSums(dataBuf, checksumBuf, "fake file", 0);
       checksum.verifyChunkedSums(dataBuf, checksumBuf, "fake file", 0);
-      fail("Did not throw on bad checksums");
-    } catch (ChecksumException ce) {
-      assertEquals(0, ce.getPos());
-    }
 
 
-    // Make sure bad checksums fail - error at end of array
-    uncorruptBufferOffset(checksumBuf, SUMS_OFFSET_IN_BUFFER);
-    corruptBufferOffset(checksumBuf, SUMS_OFFSET_IN_BUFFER + sumsLength - 1);
-    try {
+      // Change a byte in the header and in the trailer, make sure
+      // it doesn't affect checksum result
+      corruptBufferOffset(checksumBuf, 0);
+      checksum.verifyChunkedSums(dataBuf, checksumBuf, "fake file", 0);
+      corruptBufferOffset(dataBuf, 0);
+      dataBuf.limit(dataBuf.limit() + 1);
+      corruptBufferOffset(dataBuf, dataLength + DATA_OFFSET_IN_BUFFER);
+      dataBuf.limit(dataBuf.limit() - 1);
       checksum.verifyChunkedSums(dataBuf, checksumBuf, "fake file", 0);
       checksum.verifyChunkedSums(dataBuf, checksumBuf, "fake file", 0);
-      fail("Did not throw on bad checksums");
-    } catch (ChecksumException ce) {
-      int expectedPos = checksum.getBytesPerChecksum() * (numSums - 1);
-      assertEquals(expectedPos, ce.getPos());
-      assertTrue(ce.getMessage().contains("fake file"));
+
+      // Make sure bad checksums fail - error at beginning of array
+      corruptBufferOffset(checksumBuf, SUMS_OFFSET_IN_BUFFER);
+      try {
+        checksum.verifyChunkedSums(dataBuf, checksumBuf, "fake file", 0);
+        fail("Did not throw on bad checksums");
+      } catch (ChecksumException ce) {
+        assertEquals(0, ce.getPos());
+      }
+
+      // Make sure bad checksums fail - error at end of array
+      uncorruptBufferOffset(checksumBuf, SUMS_OFFSET_IN_BUFFER);
+      corruptBufferOffset(checksumBuf, SUMS_OFFSET_IN_BUFFER + sumsLength - 1);
+      try {
+        checksum.verifyChunkedSums(dataBuf, checksumBuf, "fake file", 0);
+        fail("Did not throw on bad checksums");
+      } catch (ChecksumException ce) {
+        int expectedPos = checksum.getBytesPerChecksum() * (numSums - 1);
+        assertEquals(expectedPos, ce.getPos());
+        assertTrue(ce.getMessage().contains("fake file"));
+      }
     }
     }
   }
   }
-  
+
+  private void doBulkTest(DataChecksum checksum, int dataLength,
+      boolean useDirect) throws Exception {
+    System.err.println("Testing bulk checksums of length " +
+        dataLength + " with " +
+        (useDirect ? "direct" : "array-backed") + " buffers");
+
+    new Harness(checksum, dataLength, useDirect).testCorrectness();
+  }
+
+  /**
+   * Simple performance test for the "common case" checksum usage in HDFS:
+   * computing and verifying CRC32C with 512 byte chunking on native
+   * buffers.
+   */
+  @Test
+  public void commonUsagePerfTest() throws Exception {
+    final int NUM_RUNS = 5;
+    final DataChecksum checksum = DataChecksum.newDataChecksum(DataChecksum.Type.CRC32C, 512);
+    final int dataLength = 512 * 1024 * 1024;
+    Harness h = new Harness(checksum, dataLength, true);
+
+    for (int i = 0; i < NUM_RUNS; i++) {
+      Stopwatch s = new Stopwatch().start();
+      // calculate real checksum, make sure it passes
+      checksum.calculateChunkedSums(h.dataBuf, h.checksumBuf);
+      s.stop();
+      System.err.println("Calculate run #" + i + ": " +
+                         s.elapsedTime(TimeUnit.MICROSECONDS) + "us");
+
+      s = new Stopwatch().start();
+      // calculate real checksum, make sure it passes
+      checksum.verifyChunkedSums(h.dataBuf, h.checksumBuf, "fake file", 0);
+      s.stop();
+      System.err.println("Verify run #" + i + ": " +
+                         s.elapsedTime(TimeUnit.MICROSECONDS) + "us");
+    }
+  }
+
   @Test
   @Test
   public void testEquality() {
   public void testEquality() {
     assertEquals(
     assertEquals(