|
@@ -18,13 +18,14 @@
|
|
|
|
|
|
package org.apache.hadoop.fs;
|
|
package org.apache.hadoop.fs;
|
|
|
|
|
|
|
|
+import org.apache.hadoop.classification.InterfaceAudience;
|
|
|
|
+import org.apache.hadoop.classification.InterfaceStability;
|
|
|
|
+import org.apache.hadoop.util.DataChecksum;
|
|
|
|
+
|
|
import java.io.IOException;
|
|
import java.io.IOException;
|
|
import java.io.OutputStream;
|
|
import java.io.OutputStream;
|
|
import java.util.zip.Checksum;
|
|
import java.util.zip.Checksum;
|
|
|
|
|
|
-import org.apache.hadoop.classification.InterfaceAudience;
|
|
|
|
-import org.apache.hadoop.classification.InterfaceStability;
|
|
|
|
-
|
|
|
|
/**
|
|
/**
|
|
* This is a generic output stream for generating checksums for
|
|
* This is a generic output stream for generating checksums for
|
|
* data before it is written to the underlying stream
|
|
* data before it is written to the underlying stream
|
|
@@ -33,7 +34,7 @@ import org.apache.hadoop.classification.InterfaceStability;
|
|
@InterfaceStability.Unstable
|
|
@InterfaceStability.Unstable
|
|
abstract public class FSOutputSummer extends OutputStream {
|
|
abstract public class FSOutputSummer extends OutputStream {
|
|
// data checksum
|
|
// data checksum
|
|
- private Checksum sum;
|
|
|
|
|
|
+ private final DataChecksum sum;
|
|
// internal buffer for storing data before it is checksumed
|
|
// internal buffer for storing data before it is checksumed
|
|
private byte buf[];
|
|
private byte buf[];
|
|
// internal buffer for storing checksum
|
|
// internal buffer for storing checksum
|
|
@@ -41,18 +42,24 @@ abstract public class FSOutputSummer extends OutputStream {
|
|
// The number of valid bytes in the buffer.
|
|
// The number of valid bytes in the buffer.
|
|
private int count;
|
|
private int count;
|
|
|
|
|
|
- protected FSOutputSummer(Checksum sum, int maxChunkSize, int checksumSize) {
|
|
|
|
|
|
+ // We want this value to be a multiple of 3 because the native code checksums
|
|
|
|
+ // 3 chunks simultaneously. The chosen value of 9 strikes a balance between
|
|
|
|
+ // limiting the number of JNI calls and flushing to the underlying stream
|
|
|
|
+ // relatively frequently.
|
|
|
|
+ private static final int BUFFER_NUM_CHUNKS = 9;
|
|
|
|
+
|
|
|
|
+ protected FSOutputSummer(DataChecksum sum) {
|
|
this.sum = sum;
|
|
this.sum = sum;
|
|
- this.buf = new byte[maxChunkSize];
|
|
|
|
- this.checksum = new byte[checksumSize];
|
|
|
|
|
|
+ this.buf = new byte[sum.getBytesPerChecksum() * BUFFER_NUM_CHUNKS];
|
|
|
|
+ this.checksum = new byte[sum.getChecksumSize() * BUFFER_NUM_CHUNKS];
|
|
this.count = 0;
|
|
this.count = 0;
|
|
}
|
|
}
|
|
|
|
|
|
/* write the data chunk in <code>b</code> staring at <code>offset</code> with
|
|
/* write the data chunk in <code>b</code> staring at <code>offset</code> with
|
|
- * a length of <code>len</code>, and its checksum
|
|
|
|
|
|
+ * a length of <code>len > 0</code>, and its checksum
|
|
*/
|
|
*/
|
|
- protected abstract void writeChunk(byte[] b, int offset, int len, byte[] checksum)
|
|
|
|
- throws IOException;
|
|
|
|
|
|
+ protected abstract void writeChunk(byte[] b, int bOffset, int bLen,
|
|
|
|
+ byte[] checksum, int checksumOffset, int checksumLen) throws IOException;
|
|
|
|
|
|
/**
|
|
/**
|
|
* Check if the implementing OutputStream is closed and should no longer
|
|
* Check if the implementing OutputStream is closed and should no longer
|
|
@@ -66,7 +73,6 @@ abstract public class FSOutputSummer extends OutputStream {
|
|
/** Write one byte */
|
|
/** Write one byte */
|
|
@Override
|
|
@Override
|
|
public synchronized void write(int b) throws IOException {
|
|
public synchronized void write(int b) throws IOException {
|
|
- sum.update(b);
|
|
|
|
buf[count++] = (byte)b;
|
|
buf[count++] = (byte)b;
|
|
if(count == buf.length) {
|
|
if(count == buf.length) {
|
|
flushBuffer();
|
|
flushBuffer();
|
|
@@ -111,18 +117,17 @@ abstract public class FSOutputSummer extends OutputStream {
|
|
*/
|
|
*/
|
|
private int write1(byte b[], int off, int len) throws IOException {
|
|
private int write1(byte b[], int off, int len) throws IOException {
|
|
if(count==0 && len>=buf.length) {
|
|
if(count==0 && len>=buf.length) {
|
|
- // local buffer is empty and user data has one chunk
|
|
|
|
- // checksum and output data
|
|
|
|
|
|
+ // local buffer is empty and user buffer size >= local buffer size, so
|
|
|
|
+ // simply checksum the user buffer and send it directly to the underlying
|
|
|
|
+ // stream
|
|
final int length = buf.length;
|
|
final int length = buf.length;
|
|
- sum.update(b, off, length);
|
|
|
|
- writeChecksumChunk(b, off, length, false);
|
|
|
|
|
|
+ writeChecksumChunks(b, off, length);
|
|
return length;
|
|
return length;
|
|
}
|
|
}
|
|
|
|
|
|
// copy user data to local buffer
|
|
// copy user data to local buffer
|
|
int bytesToCopy = buf.length-count;
|
|
int bytesToCopy = buf.length-count;
|
|
bytesToCopy = (len<bytesToCopy) ? len : bytesToCopy;
|
|
bytesToCopy = (len<bytesToCopy) ? len : bytesToCopy;
|
|
- sum.update(b, off, bytesToCopy);
|
|
|
|
System.arraycopy(b, off, buf, count, bytesToCopy);
|
|
System.arraycopy(b, off, buf, count, bytesToCopy);
|
|
count += bytesToCopy;
|
|
count += bytesToCopy;
|
|
if (count == buf.length) {
|
|
if (count == buf.length) {
|
|
@@ -136,22 +141,45 @@ abstract public class FSOutputSummer extends OutputStream {
|
|
* the underlying output stream.
|
|
* the underlying output stream.
|
|
*/
|
|
*/
|
|
protected synchronized void flushBuffer() throws IOException {
|
|
protected synchronized void flushBuffer() throws IOException {
|
|
- flushBuffer(false);
|
|
|
|
|
|
+ flushBuffer(false, true);
|
|
}
|
|
}
|
|
|
|
|
|
- /* Forces any buffered output bytes to be checksumed and written out to
|
|
|
|
- * the underlying output stream. If keep is true, then the state of
|
|
|
|
- * this object remains intact.
|
|
|
|
|
|
+ /* Forces buffered output bytes to be checksummed and written out to
|
|
|
|
+ * the underlying output stream. If there is a trailing partial chunk in the
|
|
|
|
+ * buffer,
|
|
|
|
+ * 1) flushPartial tells us whether to flush that chunk
|
|
|
|
+ * 2) if flushPartial is true, keep tells us whether to keep that chunk in the
|
|
|
|
+ * buffer (if flushPartial is false, it is always kept in the buffer)
|
|
|
|
+ *
|
|
|
|
+ * Returns the number of bytes that were flushed but are still left in the
|
|
|
|
+ * buffer (can only be non-zero if keep is true).
|
|
*/
|
|
*/
|
|
- protected synchronized void flushBuffer(boolean keep) throws IOException {
|
|
|
|
- if (count != 0) {
|
|
|
|
- int chunkLen = count;
|
|
|
|
|
|
+ protected synchronized int flushBuffer(boolean keep,
|
|
|
|
+ boolean flushPartial) throws IOException {
|
|
|
|
+ int bufLen = count;
|
|
|
|
+ int partialLen = bufLen % sum.getBytesPerChecksum();
|
|
|
|
+ int lenToFlush = flushPartial ? bufLen : bufLen - partialLen;
|
|
|
|
+ if (lenToFlush != 0) {
|
|
|
|
+ writeChecksumChunks(buf, 0, lenToFlush);
|
|
|
|
+ if (!flushPartial || keep) {
|
|
|
|
+ count = partialLen;
|
|
|
|
+ System.arraycopy(buf, bufLen - count, buf, 0, count);
|
|
|
|
+ } else {
|
|
count = 0;
|
|
count = 0;
|
|
- writeChecksumChunk(buf, 0, chunkLen, keep);
|
|
|
|
- if (keep) {
|
|
|
|
- count = chunkLen;
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ // total bytes left minus unflushed bytes left
|
|
|
|
+ return count - (bufLen - lenToFlush);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Checksums all complete data chunks and flushes them to the underlying
|
|
|
|
+ * stream. If there is a trailing partial chunk, it is not flushed and is
|
|
|
|
+ * maintained in the buffer.
|
|
|
|
+ */
|
|
|
|
+ public void flush() throws IOException {
|
|
|
|
+ flushBuffer(false, false);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -161,18 +189,18 @@ abstract public class FSOutputSummer extends OutputStream {
|
|
return count;
|
|
return count;
|
|
}
|
|
}
|
|
|
|
|
|
- /** Generate checksum for the data chunk and output data chunk & checksum
|
|
|
|
- * to the underlying output stream. If keep is true then keep the
|
|
|
|
- * current checksum intact, do not reset it.
|
|
|
|
|
|
+ /** Generate checksums for the given data chunks and output chunks & checksums
|
|
|
|
+ * to the underlying output stream.
|
|
*/
|
|
*/
|
|
- private void writeChecksumChunk(byte b[], int off, int len, boolean keep)
|
|
|
|
|
|
+ private void writeChecksumChunks(byte b[], int off, int len)
|
|
throws IOException {
|
|
throws IOException {
|
|
- int tempChecksum = (int)sum.getValue();
|
|
|
|
- if (!keep) {
|
|
|
|
- sum.reset();
|
|
|
|
|
|
+ sum.calculateChunkedSums(b, off, len, checksum, 0);
|
|
|
|
+ for (int i = 0; i < len; i += sum.getBytesPerChecksum()) {
|
|
|
|
+ int chunkLen = Math.min(sum.getBytesPerChecksum(), len - i);
|
|
|
|
+ int ckOffset = i / sum.getBytesPerChecksum() * sum.getChecksumSize();
|
|
|
|
+ writeChunk(b, off + i, chunkLen, checksum, ckOffset,
|
|
|
|
+ sum.getChecksumSize());
|
|
}
|
|
}
|
|
- int2byte(tempChecksum, checksum);
|
|
|
|
- writeChunk(b, off, len, checksum);
|
|
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -196,9 +224,14 @@ abstract public class FSOutputSummer extends OutputStream {
|
|
/**
|
|
/**
|
|
* Resets existing buffer with a new one of the specified size.
|
|
* Resets existing buffer with a new one of the specified size.
|
|
*/
|
|
*/
|
|
- protected synchronized void resetChecksumChunk(int size) {
|
|
|
|
- sum.reset();
|
|
|
|
|
|
+ protected synchronized void setChecksumBufSize(int size) {
|
|
this.buf = new byte[size];
|
|
this.buf = new byte[size];
|
|
|
|
+ this.checksum = new byte[((size - 1) / sum.getBytesPerChecksum() + 1) *
|
|
|
|
+ sum.getChecksumSize()];
|
|
this.count = 0;
|
|
this.count = 0;
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ protected synchronized void resetChecksumBufSize() {
|
|
|
|
+ setChecksumBufSize(sum.getBytesPerChecksum() * BUFFER_NUM_CHUNKS);
|
|
|
|
+ }
|
|
}
|
|
}
|