|
@@ -21,10 +21,12 @@ package org.apache.hadoop.util;
|
|
|
import java.io.DataInputStream;
|
|
|
import java.io.DataOutputStream;
|
|
|
import java.io.IOException;
|
|
|
+import java.nio.ByteBuffer;
|
|
|
import java.util.zip.Checksum;
|
|
|
|
|
|
import org.apache.hadoop.classification.InterfaceAudience;
|
|
|
import org.apache.hadoop.classification.InterfaceStability;
|
|
|
+import org.apache.hadoop.fs.ChecksumException;
|
|
|
|
|
|
/**
|
|
|
* This class provides inteface and utilities for processing checksums for
|
|
@@ -233,6 +235,157 @@ public class DataChecksum implements Checksum {
|
|
|
inSum += 1;
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * Verify that the given checksums match the given data.
|
|
|
+ *
|
|
|
+ * The 'mark' of the ByteBuffer parameters may be modified by this function,.
|
|
|
+ * but the position is maintained.
|
|
|
+ *
|
|
|
+ * @param data the DirectByteBuffer pointing to the data to verify.
|
|
|
+ * @param checksums the DirectByteBuffer pointing to a series of stored
|
|
|
+ * checksums
|
|
|
+ * @param fileName the name of the file being read, for error-reporting
|
|
|
+ * @param basePos the file position to which the start of 'data' corresponds
|
|
|
+ * @throws ChecksumException if the checksums do not match
|
|
|
+ */
|
|
|
+ public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums,
|
|
|
+ String fileName, long basePos)
|
|
|
+ throws ChecksumException {
|
|
|
+ if (size == 0) return;
|
|
|
+
|
|
|
+ if (data.hasArray() && checksums.hasArray()) {
|
|
|
+ verifyChunkedSums(
|
|
|
+ data.array(), data.arrayOffset() + data.position(), data.remaining(),
|
|
|
+ checksums.array(), checksums.arrayOffset() + checksums.position(),
|
|
|
+ fileName, basePos);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ int startDataPos = data.position();
|
|
|
+ data.mark();
|
|
|
+ checksums.mark();
|
|
|
+ try {
|
|
|
+ byte[] buf = new byte[bytesPerChecksum];
|
|
|
+ byte[] sum = new byte[size];
|
|
|
+ while (data.remaining() > 0) {
|
|
|
+ int n = Math.min(data.remaining(), bytesPerChecksum);
|
|
|
+ checksums.get(sum);
|
|
|
+ data.get(buf, 0, n);
|
|
|
+ summer.reset();
|
|
|
+ summer.update(buf, 0, n);
|
|
|
+ int calculated = (int)summer.getValue();
|
|
|
+ int stored = (sum[0] << 24 & 0xff000000) |
|
|
|
+ (sum[1] << 16 & 0xff0000) |
|
|
|
+ (sum[2] << 8 & 0xff00) |
|
|
|
+ sum[3] & 0xff;
|
|
|
+ if (calculated != stored) {
|
|
|
+ long errPos = basePos + data.position() - startDataPos - n;
|
|
|
+ throw new ChecksumException(
|
|
|
+ "Checksum error: "+ fileName + " at "+ errPos +
|
|
|
+ " exp: " + stored + " got: " + calculated, errPos);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } finally {
|
|
|
+ data.reset();
|
|
|
+ checksums.reset();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Implementation of chunked verification specifically on byte arrays. This
|
|
|
+ * is to avoid the copy when dealing with ByteBuffers that have array backing.
|
|
|
+ */
|
|
|
+ private void verifyChunkedSums(
|
|
|
+ byte[] data, int dataOff, int dataLen,
|
|
|
+ byte[] checksums, int checksumsOff, String fileName,
|
|
|
+ long basePos) throws ChecksumException {
|
|
|
+
|
|
|
+ int remaining = dataLen;
|
|
|
+ int dataPos = 0;
|
|
|
+ while (remaining > 0) {
|
|
|
+ int n = Math.min(remaining, bytesPerChecksum);
|
|
|
+
|
|
|
+ summer.reset();
|
|
|
+ summer.update(data, dataOff + dataPos, n);
|
|
|
+ dataPos += n;
|
|
|
+ remaining -= n;
|
|
|
+
|
|
|
+ int calculated = (int)summer.getValue();
|
|
|
+ int stored = (checksums[checksumsOff] << 24 & 0xff000000) |
|
|
|
+ (checksums[checksumsOff + 1] << 16 & 0xff0000) |
|
|
|
+ (checksums[checksumsOff + 2] << 8 & 0xff00) |
|
|
|
+ checksums[checksumsOff + 3] & 0xff;
|
|
|
+ checksumsOff += 4;
|
|
|
+ if (calculated != stored) {
|
|
|
+ long errPos = basePos + dataPos - n;
|
|
|
+ throw new ChecksumException(
|
|
|
+ "Checksum error: "+ fileName + " at "+ errPos +
|
|
|
+ " exp: " + stored + " got: " + calculated, errPos);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Calculate checksums for the given data.
|
|
|
+ *
|
|
|
+ * The 'mark' of the ByteBuffer parameters may be modified by this function,
|
|
|
+ * but the position is maintained.
|
|
|
+ *
|
|
|
+ * @param data the DirectByteBuffer pointing to the data to checksum.
|
|
|
+ * @param checksums the DirectByteBuffer into which checksums will be
|
|
|
+ * stored. Enough space must be available in this
|
|
|
+ * buffer to put the checksums.
|
|
|
+ */
|
|
|
+ public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) {
|
|
|
+ if (size == 0) return;
|
|
|
+
|
|
|
+ if (data.hasArray() && checksums.hasArray()) {
|
|
|
+ calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(),
|
|
|
+ checksums.array(), checksums.arrayOffset() + checksums.position());
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ data.mark();
|
|
|
+ checksums.mark();
|
|
|
+ try {
|
|
|
+ byte[] buf = new byte[bytesPerChecksum];
|
|
|
+ while (data.remaining() > 0) {
|
|
|
+ int n = Math.min(data.remaining(), bytesPerChecksum);
|
|
|
+ data.get(buf, 0, n);
|
|
|
+ summer.reset();
|
|
|
+ summer.update(buf, 0, n);
|
|
|
+ checksums.putInt((int)summer.getValue());
|
|
|
+ }
|
|
|
+ } finally {
|
|
|
+ data.reset();
|
|
|
+ checksums.reset();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Implementation of chunked calculation specifically on byte arrays. This
|
|
|
+ * is to avoid the copy when dealing with ByteBuffers that have array backing.
|
|
|
+ */
|
|
|
+ private void calculateChunkedSums(
|
|
|
+ byte[] data, int dataOffset, int dataLength,
|
|
|
+ byte[] sums, int sumsOffset) {
|
|
|
+
|
|
|
+ int remaining = dataLength;
|
|
|
+ while (remaining > 0) {
|
|
|
+ int n = Math.min(remaining, bytesPerChecksum);
|
|
|
+ summer.reset();
|
|
|
+ summer.update(data, dataOffset, n);
|
|
|
+ dataOffset += n;
|
|
|
+ remaining -= n;
|
|
|
+ long calculated = summer.getValue();
|
|
|
+ sums[sumsOffset++] = (byte) (calculated >> 24);
|
|
|
+ sums[sumsOffset++] = (byte) (calculated >> 16);
|
|
|
+ sums[sumsOffset++] = (byte) (calculated >> 8);
|
|
|
+ sums[sumsOffset++] = (byte) (calculated);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
/**
|
|
|
* This just provides a dummy implimentation for Checksum class
|
|
|
* This is used when there is no checksum available or required for
|