|
@@ -17,72 +17,95 @@
|
|
|
*/
|
|
|
package org.apache.hadoop.hdfs.client.impl;
|
|
|
|
|
|
-import java.io.BufferedInputStream;
|
|
|
import java.io.BufferedOutputStream;
|
|
|
import java.io.DataInputStream;
|
|
|
import java.io.DataOutputStream;
|
|
|
import java.io.IOException;
|
|
|
+import java.io.OutputStream;
|
|
|
+import java.net.InetSocketAddress;
|
|
|
import java.nio.ByteBuffer;
|
|
|
+import java.nio.channels.ReadableByteChannel;
|
|
|
import java.util.EnumSet;
|
|
|
+import java.util.UUID;
|
|
|
|
|
|
import org.apache.hadoop.classification.InterfaceAudience;
|
|
|
-import org.apache.hadoop.fs.FSInputChecker;
|
|
|
-import org.apache.hadoop.fs.Path;
|
|
|
import org.apache.hadoop.fs.ReadOption;
|
|
|
import org.apache.hadoop.hdfs.BlockReader;
|
|
|
import org.apache.hadoop.hdfs.PeerCache;
|
|
|
import org.apache.hadoop.hdfs.net.Peer;
|
|
|
-import org.apache.hadoop.hdfs.protocol.Block;
|
|
|
import org.apache.hadoop.hdfs.protocol.DatanodeID;
|
|
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
|
import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil;
|
|
|
import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader;
|
|
|
+import org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver;
|
|
|
import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
|
|
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
|
|
|
+import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.ClientReadStatusProto;
|
|
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.ReadOpChecksumInfoProto;
|
|
|
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
|
|
|
import org.apache.hadoop.hdfs.protocolPB.PBHelperClient;
|
|
|
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
|
|
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
|
|
|
import org.apache.hadoop.hdfs.shortcircuit.ClientMmap;
|
|
|
-import org.apache.hadoop.io.IOUtils;
|
|
|
import org.apache.hadoop.security.token.Token;
|
|
|
import org.apache.hadoop.util.DataChecksum;
|
|
|
import org.apache.htrace.core.TraceScope;
|
|
|
+
|
|
|
+import com.google.common.annotations.VisibleForTesting;
|
|
|
+
|
|
|
import org.apache.htrace.core.Tracer;
|
|
|
import org.slf4j.Logger;
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
|
-
|
|
|
/**
|
|
|
- * @deprecated this is an old implementation that is being left around
|
|
|
- * in case any issues spring up with the new {@link BlockReaderRemote2}
|
|
|
- * implementation.
|
|
|
- * It will be removed in the next release.
|
|
|
+ * This is a wrapper around connection to datanode
|
|
|
+ * and understands checksum, offset etc.
|
|
|
+ *
|
|
|
+ * Terminology:
|
|
|
+ * <dl>
|
|
|
+ * <dt>block</dt>
|
|
|
+ * <dd>The hdfs block, typically large (~64MB).
|
|
|
+ * </dd>
|
|
|
+ * <dt>chunk</dt>
|
|
|
+ * <dd>A block is divided into chunks, each comes with a checksum.
|
|
|
+ * We want transfers to be chunk-aligned, to be able to
|
|
|
+ * verify checksums.
|
|
|
+ * </dd>
|
|
|
+ * <dt>packet</dt>
|
|
|
+ * <dd>A grouping of chunks used for transport. It contains a
|
|
|
+ * header, followed by checksum data, followed by real data.
|
|
|
+ * </dd>
|
|
|
+ * </dl>
|
|
|
+ * Please see DataNode for the RPC specification.
|
|
|
+ *
|
|
|
+ * This is a new implementation introduced in Hadoop 0.23 which
|
|
|
+ * is more efficient and simpler than the older BlockReader
|
|
|
+ * implementation. It is renamed to BlockReaderRemote from BlockReaderRemote2.
|
|
|
+ *
|
|
|
*/
|
|
|
@InterfaceAudience.Private
|
|
|
-@Deprecated
|
|
|
-public class BlockReaderRemote extends FSInputChecker implements BlockReader {
|
|
|
- static final Logger LOG = LoggerFactory.getLogger(FSInputChecker.class);
|
|
|
+public class BlockReaderRemote implements BlockReader {
|
|
|
+
|
|
|
+ static final Logger LOG = LoggerFactory.getLogger(BlockReaderRemote.class);
|
|
|
+ static final int TCP_WINDOW_SIZE = 128 * 1024; // 128 KB;
|
|
|
+
|
|
|
+ final private Peer peer;
|
|
|
+ final private DatanodeID datanodeID;
|
|
|
+ final private PeerCache peerCache;
|
|
|
+ final private long blockId;
|
|
|
+ private final ReadableByteChannel in;
|
|
|
|
|
|
- private final Peer peer;
|
|
|
- private final DatanodeID datanodeID;
|
|
|
- private final DataInputStream in;
|
|
|
private DataChecksum checksum;
|
|
|
+ private final PacketReceiver packetReceiver = new PacketReceiver(true);
|
|
|
+
|
|
|
+ private ByteBuffer curDataSlice = null;
|
|
|
|
|
|
/** offset in block of the last chunk received */
|
|
|
- private long lastChunkOffset = -1;
|
|
|
- private long lastChunkLen = -1;
|
|
|
private long lastSeqNo = -1;
|
|
|
|
|
|
/** offset in block where reader wants to actually read */
|
|
|
private long startOffset;
|
|
|
-
|
|
|
- private final long blockId;
|
|
|
-
|
|
|
- /** offset in block of of first chunk - may be less than startOffset
|
|
|
- if startOffset is not chunk-aligned */
|
|
|
- private final long firstChunkOffset;
|
|
|
+ private final String filename;
|
|
|
|
|
|
private final int bytesPerChecksum;
|
|
|
private final int checksumSize;
|
|
@@ -92,269 +115,182 @@ public class BlockReaderRemote extends FSInputChecker implements BlockReader {
|
|
|
* This is the amount that the user has requested plus some padding
|
|
|
* at the beginning so that the read can begin on a chunk boundary.
|
|
|
*/
|
|
|
- private final long bytesNeededToFinish;
|
|
|
+ private long bytesNeededToFinish;
|
|
|
|
|
|
- private boolean eos = false;
|
|
|
- private boolean sentStatusCode = false;
|
|
|
-
|
|
|
- ByteBuffer checksumBytes = null;
|
|
|
- /** Amount of unread data in the current received packet */
|
|
|
- int dataLeft = 0;
|
|
|
+ private final boolean verifyChecksum;
|
|
|
|
|
|
- private final PeerCache peerCache;
|
|
|
+ private boolean sentStatusCode = false;
|
|
|
|
|
|
private final Tracer tracer;
|
|
|
|
|
|
private final int networkDistance;
|
|
|
|
|
|
- /* FSInputChecker interface */
|
|
|
+ @VisibleForTesting
|
|
|
+ public Peer getPeer() {
|
|
|
+ return peer;
|
|
|
+ }
|
|
|
|
|
|
- /* same interface as inputStream java.io.InputStream#read()
|
|
|
- * used by DFSInputStream#read()
|
|
|
- * This violates one rule when there is a checksum error:
|
|
|
- * "Read should not modify user buffer before successful read"
|
|
|
- * because it first reads the data to user buffer and then checks
|
|
|
- * the checksum.
|
|
|
- */
|
|
|
@Override
|
|
|
public synchronized int read(byte[] buf, int off, int len)
|
|
|
throws IOException {
|
|
|
-
|
|
|
- // This has to be set here, *before* the skip, since we can
|
|
|
- // hit EOS during the skip, in the case that our entire read
|
|
|
- // is smaller than the checksum chunk.
|
|
|
- boolean eosBefore = eos;
|
|
|
-
|
|
|
- //for the first read, skip the extra bytes at the front.
|
|
|
- if (lastChunkLen < 0 && startOffset > firstChunkOffset && len > 0) {
|
|
|
- // Skip these bytes. But don't call this.skip()!
|
|
|
- int toSkip = (int)(startOffset - firstChunkOffset);
|
|
|
- if ( super.readAndDiscard(toSkip) != toSkip ) {
|
|
|
- // should never happen
|
|
|
- throw new IOException("Could not skip required number of bytes");
|
|
|
+ UUID randomId = (LOG.isTraceEnabled() ? UUID.randomUUID() : null);
|
|
|
+ LOG.trace("Starting read #{} file {} from datanode {}",
|
|
|
+ randomId, filename, datanodeID.getHostName());
|
|
|
+
|
|
|
+ if (curDataSlice == null ||
|
|
|
+ curDataSlice.remaining() == 0 && bytesNeededToFinish > 0) {
|
|
|
+ try (TraceScope ignored = tracer.newScope(
|
|
|
+ "BlockReaderRemote2#readNextPacket(" + blockId + ")")) {
|
|
|
+ readNextPacket();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- int nRead = super.read(buf, off, len);
|
|
|
+ LOG.trace("Finishing read #{}", randomId);
|
|
|
|
|
|
- // if eos was set in the previous read, send a status code to the DN
|
|
|
- if (eos && !eosBefore && nRead >= 0) {
|
|
|
- if (needChecksum()) {
|
|
|
- sendReadResult(peer, Status.CHECKSUM_OK);
|
|
|
- } else {
|
|
|
- sendReadResult(peer, Status.SUCCESS);
|
|
|
- }
|
|
|
+ if (curDataSlice.remaining() == 0) {
|
|
|
+ // we're at EOF now
|
|
|
+ return -1;
|
|
|
}
|
|
|
+
|
|
|
+ int nRead = Math.min(curDataSlice.remaining(), len);
|
|
|
+ curDataSlice.get(buf, off, nRead);
|
|
|
+
|
|
|
return nRead;
|
|
|
}
|
|
|
|
|
|
+
|
|
|
@Override
|
|
|
- public synchronized long skip(long n) throws IOException {
|
|
|
- /* How can we make sure we don't throw a ChecksumException, at least
|
|
|
- * in majority of the cases?. This one throws. */
|
|
|
- long nSkipped = 0;
|
|
|
- while (nSkipped < n) {
|
|
|
- int toSkip = (int)Math.min(n-nSkipped, Integer.MAX_VALUE);
|
|
|
- int ret = readAndDiscard(toSkip);
|
|
|
- if (ret <= 0) {
|
|
|
- return nSkipped;
|
|
|
+ public synchronized int read(ByteBuffer buf) throws IOException {
|
|
|
+ if (curDataSlice == null ||
|
|
|
+ (curDataSlice.remaining() == 0 && bytesNeededToFinish > 0)) {
|
|
|
+ try (TraceScope ignored = tracer.newScope(
|
|
|
+ "BlockReaderRemote2#readNextPacket(" + blockId + ")")) {
|
|
|
+ readNextPacket();
|
|
|
}
|
|
|
- nSkipped += ret;
|
|
|
}
|
|
|
- return nSkipped;
|
|
|
- }
|
|
|
+ if (curDataSlice.remaining() == 0) {
|
|
|
+ // we're at EOF now
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
|
|
|
- @Override
|
|
|
- public int read() throws IOException {
|
|
|
- throw new IOException("read() is not expected to be invoked. " +
|
|
|
- "Use read(buf, off, len) instead.");
|
|
|
- }
|
|
|
+ int nRead = Math.min(curDataSlice.remaining(), buf.remaining());
|
|
|
+ ByteBuffer writeSlice = curDataSlice.duplicate();
|
|
|
+ writeSlice.limit(writeSlice.position() + nRead);
|
|
|
+ buf.put(writeSlice);
|
|
|
+ curDataSlice.position(writeSlice.position());
|
|
|
|
|
|
- @Override
|
|
|
- public boolean seekToNewSource(long targetPos) throws IOException {
|
|
|
- /* Checksum errors are handled outside the BlockReader.
|
|
|
- * DFSInputStream does not always call 'seekToNewSource'. In the
|
|
|
- * case of pread(), it just tries a different replica without seeking.
|
|
|
- */
|
|
|
- return false;
|
|
|
+ return nRead;
|
|
|
}
|
|
|
|
|
|
- @Override
|
|
|
- public void seek(long pos) throws IOException {
|
|
|
- throw new IOException("Seek() is not supported in BlockInputChecker");
|
|
|
- }
|
|
|
+ private void readNextPacket() throws IOException {
|
|
|
+ //Read packet headers.
|
|
|
+ packetReceiver.receiveNextPacket(in);
|
|
|
|
|
|
- @Override
|
|
|
- protected long getChunkPosition(long pos) {
|
|
|
- throw new RuntimeException("getChunkPosition() is not supported, " +
|
|
|
- "since seek is not required");
|
|
|
- }
|
|
|
+ PacketHeader curHeader = packetReceiver.getHeader();
|
|
|
+ curDataSlice = packetReceiver.getDataSlice();
|
|
|
+ assert curDataSlice.capacity() == curHeader.getDataLen();
|
|
|
|
|
|
- /**
|
|
|
- * Makes sure that checksumBytes has enough capacity
|
|
|
- * and limit is set to the number of checksum bytes needed
|
|
|
- * to be read.
|
|
|
- */
|
|
|
- private void adjustChecksumBytes(int dataLen) {
|
|
|
- int requiredSize =
|
|
|
- ((dataLen + bytesPerChecksum - 1)/bytesPerChecksum)*checksumSize;
|
|
|
- if (checksumBytes == null || requiredSize > checksumBytes.capacity()) {
|
|
|
- checksumBytes = ByteBuffer.wrap(new byte[requiredSize]);
|
|
|
- } else {
|
|
|
- checksumBytes.clear();
|
|
|
- }
|
|
|
- checksumBytes.limit(requiredSize);
|
|
|
- }
|
|
|
+ LOG.trace("DFSClient readNextPacket got header {}", curHeader);
|
|
|
|
|
|
- @Override
|
|
|
- protected synchronized int readChunk(long pos, byte[] buf, int offset,
|
|
|
- int len, byte[] checksumBuf)
|
|
|
- throws IOException {
|
|
|
- try (TraceScope ignored = tracer.newScope(
|
|
|
- "BlockReaderRemote#readChunk(" + blockId + ")")) {
|
|
|
- return readChunkImpl(pos, buf, offset, len, checksumBuf);
|
|
|
+ // Sanity check the lengths
|
|
|
+ if (!curHeader.sanityCheck(lastSeqNo)) {
|
|
|
+ throw new IOException("BlockReader: error in packet header " +
|
|
|
+ curHeader);
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- private synchronized int readChunkImpl(long pos, byte[] buf, int offset,
|
|
|
- int len, byte[] checksumBuf)
|
|
|
- throws IOException {
|
|
|
- // Read one chunk.
|
|
|
- if (eos) {
|
|
|
- // Already hit EOF
|
|
|
- return -1;
|
|
|
+ if (curHeader.getDataLen() > 0) {
|
|
|
+ int chunks = 1 + (curHeader.getDataLen() - 1) / bytesPerChecksum;
|
|
|
+ int checksumsLen = chunks * checksumSize;
|
|
|
+
|
|
|
+ assert packetReceiver.getChecksumSlice().capacity() == checksumsLen :
|
|
|
+ "checksum slice capacity=" +
|
|
|
+ packetReceiver.getChecksumSlice().capacity() +
|
|
|
+ " checksumsLen=" + checksumsLen;
|
|
|
+
|
|
|
+ lastSeqNo = curHeader.getSeqno();
|
|
|
+ if (verifyChecksum && curDataSlice.remaining() > 0) {
|
|
|
+ // N.B.: the checksum error offset reported here is actually
|
|
|
+ // relative to the start of the block, not the start of the file.
|
|
|
+ // This is slightly misleading, but preserves the behavior from
|
|
|
+ // the older BlockReader.
|
|
|
+ checksum.verifyChunkedSums(curDataSlice,
|
|
|
+ packetReceiver.getChecksumSlice(),
|
|
|
+ filename, curHeader.getOffsetInBlock());
|
|
|
+ }
|
|
|
+ bytesNeededToFinish -= curHeader.getDataLen();
|
|
|
}
|
|
|
|
|
|
- // Read one DATA_CHUNK.
|
|
|
- long chunkOffset = lastChunkOffset;
|
|
|
- if ( lastChunkLen > 0 ) {
|
|
|
- chunkOffset += lastChunkLen;
|
|
|
+ // First packet will include some data prior to the first byte
|
|
|
+ // the user requested. Skip it.
|
|
|
+ if (curHeader.getOffsetInBlock() < startOffset) {
|
|
|
+ int newPos = (int) (startOffset - curHeader.getOffsetInBlock());
|
|
|
+ curDataSlice.position(newPos);
|
|
|
}
|
|
|
|
|
|
- // pos is relative to the start of the first chunk of the read.
|
|
|
- // chunkOffset is relative to the start of the block.
|
|
|
- // This makes sure that the read passed from FSInputChecker is the
|
|
|
- // for the same chunk we expect to be reading from the DN.
|
|
|
- if ( (pos + firstChunkOffset) != chunkOffset ) {
|
|
|
- throw new IOException("Mismatch in pos : " + pos + " + " +
|
|
|
- firstChunkOffset + " != " + chunkOffset);
|
|
|
+ // If we've now satisfied the whole client read, read one last packet
|
|
|
+ // header, which should be empty
|
|
|
+ if (bytesNeededToFinish <= 0) {
|
|
|
+ readTrailingEmptyPacket();
|
|
|
+ if (verifyChecksum) {
|
|
|
+ sendReadResult(Status.CHECKSUM_OK);
|
|
|
+ } else {
|
|
|
+ sendReadResult(Status.SUCCESS);
|
|
|
+ }
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
- // Read next packet if the previous packet has been read completely.
|
|
|
- if (dataLeft <= 0) {
|
|
|
- //Read packet headers.
|
|
|
- PacketHeader header = new PacketHeader();
|
|
|
- header.readFields(in);
|
|
|
-
|
|
|
- LOG.debug("DFSClient readChunk got header {}", header);
|
|
|
-
|
|
|
- // Sanity check the lengths
|
|
|
- if (!header.sanityCheck(lastSeqNo)) {
|
|
|
- throw new IOException("BlockReader: error in packet header " +
|
|
|
- header);
|
|
|
+ @Override
|
|
|
+ public synchronized long skip(long n) throws IOException {
|
|
|
+ /* How can we make sure we don't throw a ChecksumException, at least
|
|
|
+ * in majority of the cases?. This one throws. */
|
|
|
+ long skipped = 0;
|
|
|
+ while (skipped < n) {
|
|
|
+ long needToSkip = n - skipped;
|
|
|
+ if (curDataSlice == null ||
|
|
|
+ curDataSlice.remaining() == 0 && bytesNeededToFinish > 0) {
|
|
|
+ readNextPacket();
|
|
|
}
|
|
|
-
|
|
|
- lastSeqNo = header.getSeqno();
|
|
|
- dataLeft = header.getDataLen();
|
|
|
- adjustChecksumBytes(header.getDataLen());
|
|
|
- if (header.getDataLen() > 0) {
|
|
|
- IOUtils.readFully(in, checksumBytes.array(), 0,
|
|
|
- checksumBytes.limit());
|
|
|
+ if (curDataSlice.remaining() == 0) {
|
|
|
+ // we're at EOF now
|
|
|
+ break;
|
|
|
}
|
|
|
- }
|
|
|
-
|
|
|
- // Sanity checks
|
|
|
- assert len >= bytesPerChecksum;
|
|
|
- assert checksum != null;
|
|
|
- assert checksumSize == 0 || (checksumBuf.length % checksumSize == 0);
|
|
|
-
|
|
|
-
|
|
|
- int checksumsToRead, bytesToRead;
|
|
|
|
|
|
- if (checksumSize > 0) {
|
|
|
-
|
|
|
- // How many chunks left in our packet - this is a ceiling
|
|
|
- // since we may have a partial chunk at the end of the file
|
|
|
- int chunksLeft = (dataLeft - 1) / bytesPerChecksum + 1;
|
|
|
-
|
|
|
- // How many chunks we can fit in databuffer
|
|
|
- // - note this is a floor since we always read full chunks
|
|
|
- int chunksCanFit = Math.min(len / bytesPerChecksum,
|
|
|
- checksumBuf.length / checksumSize);
|
|
|
-
|
|
|
- // How many chunks should we read
|
|
|
- checksumsToRead = Math.min(chunksLeft, chunksCanFit);
|
|
|
- // How many bytes should we actually read
|
|
|
- bytesToRead = Math.min(
|
|
|
- checksumsToRead * bytesPerChecksum, // full chunks
|
|
|
- dataLeft); // in case we have a partial
|
|
|
- } else {
|
|
|
- // no checksum
|
|
|
- bytesToRead = Math.min(dataLeft, len);
|
|
|
- checksumsToRead = 0;
|
|
|
- }
|
|
|
-
|
|
|
- if ( bytesToRead > 0 ) {
|
|
|
- // Assert we have enough space
|
|
|
- assert bytesToRead <= len;
|
|
|
- assert checksumBytes.remaining() >= checksumSize * checksumsToRead;
|
|
|
- assert checksumBuf.length >= checksumSize * checksumsToRead;
|
|
|
- IOUtils.readFully(in, buf, offset, bytesToRead);
|
|
|
- checksumBytes.get(checksumBuf, 0, checksumSize * checksumsToRead);
|
|
|
+ int skip = (int)Math.min(curDataSlice.remaining(), needToSkip);
|
|
|
+ curDataSlice.position(curDataSlice.position() + skip);
|
|
|
+ skipped += skip;
|
|
|
}
|
|
|
+ return skipped;
|
|
|
+ }
|
|
|
|
|
|
- dataLeft -= bytesToRead;
|
|
|
- assert dataLeft >= 0;
|
|
|
-
|
|
|
- lastChunkOffset = chunkOffset;
|
|
|
- lastChunkLen = bytesToRead;
|
|
|
-
|
|
|
- // If there's no data left in the current packet after satisfying
|
|
|
- // this read, and we have satisfied the client read, we expect
|
|
|
- // an empty packet header from the DN to signify this.
|
|
|
- // Note that pos + bytesToRead may in fact be greater since the
|
|
|
- // DN finishes off the entire last chunk.
|
|
|
- if (dataLeft == 0 &&
|
|
|
- pos + bytesToRead >= bytesNeededToFinish) {
|
|
|
-
|
|
|
- // Read header
|
|
|
- PacketHeader hdr = new PacketHeader();
|
|
|
- hdr.readFields(in);
|
|
|
+ private void readTrailingEmptyPacket() throws IOException {
|
|
|
+ LOG.trace("Reading empty packet at end of read");
|
|
|
|
|
|
- if (!hdr.isLastPacketInBlock() ||
|
|
|
- hdr.getDataLen() != 0) {
|
|
|
- throw new IOException("Expected empty end-of-read packet! Header: " +
|
|
|
- hdr);
|
|
|
- }
|
|
|
+ packetReceiver.receiveNextPacket(in);
|
|
|
|
|
|
- eos = true;
|
|
|
+ PacketHeader trailer = packetReceiver.getHeader();
|
|
|
+ if (!trailer.isLastPacketInBlock() ||
|
|
|
+ trailer.getDataLen() != 0) {
|
|
|
+ throw new IOException("Expected empty end-of-read packet! Header: " +
|
|
|
+ trailer);
|
|
|
}
|
|
|
-
|
|
|
- if ( bytesToRead == 0 ) {
|
|
|
- return -1;
|
|
|
- }
|
|
|
-
|
|
|
- return bytesToRead;
|
|
|
}
|
|
|
|
|
|
- private BlockReaderRemote(String file, String bpid, long blockId,
|
|
|
- DataInputStream in, DataChecksum checksum, boolean verifyChecksum,
|
|
|
- long startOffset, long firstChunkOffset, long bytesToRead, Peer peer,
|
|
|
- DatanodeID datanodeID, PeerCache peerCache, Tracer tracer,
|
|
|
- int networkDistance) {
|
|
|
+ protected BlockReaderRemote(String file, long blockId,
|
|
|
+ DataChecksum checksum, boolean verifyChecksum,
|
|
|
+ long startOffset, long firstChunkOffset,
|
|
|
+ long bytesToRead, Peer peer,
|
|
|
+ DatanodeID datanodeID, PeerCache peerCache,
|
|
|
+ Tracer tracer,
|
|
|
+ int networkDistance) {
|
|
|
// Path is used only for printing block and file information in debug
|
|
|
- super(new Path("/" + Block.BLOCK_FILE_PREFIX + blockId +
|
|
|
- ":" + bpid + ":of:"+ file)/*too non path-like?*/,
|
|
|
- 1, verifyChecksum,
|
|
|
- checksum.getChecksumSize() > 0? checksum : null,
|
|
|
- checksum.getBytesPerChecksum(),
|
|
|
- checksum.getChecksumSize());
|
|
|
-
|
|
|
this.peer = peer;
|
|
|
this.datanodeID = datanodeID;
|
|
|
- this.in = in;
|
|
|
+ this.in = peer.getInputStreamChannel();
|
|
|
this.checksum = checksum;
|
|
|
+ this.verifyChecksum = verifyChecksum;
|
|
|
this.startOffset = Math.max( startOffset, 0 );
|
|
|
+ this.filename = file;
|
|
|
+ this.peerCache = peerCache;
|
|
|
this.blockId = blockId;
|
|
|
|
|
|
// The total number of bytes that we need to transfer from the DN is
|
|
@@ -362,18 +298,81 @@ public class BlockReaderRemote extends FSInputChecker implements BlockReader {
|
|
|
// the beginning in order to chunk-align. Note that the DN may elect
|
|
|
// to send more than this amount if the read starts/ends mid-chunk.
|
|
|
this.bytesNeededToFinish = bytesToRead + (startOffset - firstChunkOffset);
|
|
|
-
|
|
|
- this.firstChunkOffset = firstChunkOffset;
|
|
|
- lastChunkOffset = firstChunkOffset;
|
|
|
- lastChunkLen = -1;
|
|
|
-
|
|
|
bytesPerChecksum = this.checksum.getBytesPerChecksum();
|
|
|
checksumSize = this.checksum.getChecksumSize();
|
|
|
- this.peerCache = peerCache;
|
|
|
this.tracer = tracer;
|
|
|
this.networkDistance = networkDistance;
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public synchronized void close() throws IOException {
|
|
|
+ packetReceiver.close();
|
|
|
+ startOffset = -1;
|
|
|
+ checksum = null;
|
|
|
+ if (peerCache != null && sentStatusCode) {
|
|
|
+ peerCache.put(datanodeID, peer);
|
|
|
+ } else {
|
|
|
+ peer.close();
|
|
|
+ }
|
|
|
+
|
|
|
+ // in will be closed when its Socket is closed.
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * When the reader reaches end of the read, it sends a status response
|
|
|
+ * (e.g. CHECKSUM_OK) to the DN. Failure to do so could lead to the DN
|
|
|
+ * closing our connection (which we will re-open), but won't affect
|
|
|
+ * data correctness.
|
|
|
+ */
|
|
|
+ void sendReadResult(Status statusCode) {
|
|
|
+ assert !sentStatusCode : "already sent status code to " + peer;
|
|
|
+ try {
|
|
|
+ writeReadResult(peer.getOutputStream(), statusCode);
|
|
|
+ sentStatusCode = true;
|
|
|
+ } catch (IOException e) {
|
|
|
+ // It's ok not to be able to send this. But something is probably wrong.
|
|
|
+ LOG.info("Could not send read status (" + statusCode + ") to datanode " +
|
|
|
+ peer.getRemoteAddressString() + ": " + e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Serialize the actual read result on the wire.
|
|
|
+ */
|
|
|
+ static void writeReadResult(OutputStream out, Status statusCode)
|
|
|
+ throws IOException {
|
|
|
+
|
|
|
+ ClientReadStatusProto.newBuilder()
|
|
|
+ .setStatus(statusCode)
|
|
|
+ .build()
|
|
|
+ .writeDelimitedTo(out);
|
|
|
+
|
|
|
+ out.flush();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * File name to print when accessing a block directly (from servlets)
|
|
|
+ * @param s Address of the block location
|
|
|
+ * @param poolId Block pool ID of the block
|
|
|
+ * @param blockId Block ID of the block
|
|
|
+ * @return string that has a file name for debug purposes
|
|
|
+ */
|
|
|
+ public static String getFileName(final InetSocketAddress s,
|
|
|
+ final String poolId, final long blockId) {
|
|
|
+ return s.toString() + ":" + poolId + ":" + blockId;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public int readAll(byte[] buf, int offset, int len) throws IOException {
|
|
|
+ return BlockReaderUtil.readAll(this, buf, offset, len);
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void readFully(byte[] buf, int off, int len) throws IOException {
|
|
|
+ BlockReaderUtil.readFully(this, buf, off, len);
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* Create a new BlockReader specifically to satisfy a read.
|
|
|
* This method also sends the OP_READ_BLOCK request.
|
|
@@ -383,38 +382,37 @@ public class BlockReaderRemote extends FSInputChecker implements BlockReader {
|
|
|
* @param blockToken The block token for security
|
|
|
* @param startOffset The read offset, relative to block head
|
|
|
* @param len The number of bytes to read
|
|
|
- * @param bufferSize The IO buffer size (not the client buffer size)
|
|
|
* @param verifyChecksum Whether to verify checksum
|
|
|
* @param clientName Client name
|
|
|
+ * @param peer The Peer to use
|
|
|
+ * @param datanodeID The DatanodeID this peer is connected to
|
|
|
* @return New BlockReader instance, or null on error.
|
|
|
*/
|
|
|
- public static BlockReaderRemote newBlockReader(String file,
|
|
|
+ public static BlockReader newBlockReader(String file,
|
|
|
ExtendedBlock block,
|
|
|
Token<BlockTokenIdentifier> blockToken,
|
|
|
long startOffset, long len,
|
|
|
- int bufferSize, boolean verifyChecksum,
|
|
|
- String clientName, Peer peer,
|
|
|
- DatanodeID datanodeID,
|
|
|
+ boolean verifyChecksum,
|
|
|
+ String clientName,
|
|
|
+ Peer peer, DatanodeID datanodeID,
|
|
|
PeerCache peerCache,
|
|
|
CachingStrategy cachingStrategy,
|
|
|
- Tracer tracer, int networkDistance)
|
|
|
- throws IOException {
|
|
|
+ Tracer tracer,
|
|
|
+ int networkDistance) throws IOException {
|
|
|
// in and out will be closed when sock is closed (by the caller)
|
|
|
- final DataOutputStream out =
|
|
|
- new DataOutputStream(new BufferedOutputStream(peer.getOutputStream()));
|
|
|
+ final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(
|
|
|
+ peer.getOutputStream()));
|
|
|
new Sender(out).readBlock(block, blockToken, clientName, startOffset, len,
|
|
|
verifyChecksum, cachingStrategy);
|
|
|
|
|
|
//
|
|
|
- // Get bytes in block, set streams
|
|
|
+ // Get bytes in block
|
|
|
//
|
|
|
-
|
|
|
- DataInputStream in = new DataInputStream(
|
|
|
- new BufferedInputStream(peer.getInputStream(), bufferSize));
|
|
|
+ DataInputStream in = new DataInputStream(peer.getInputStream());
|
|
|
|
|
|
BlockOpResponseProto status = BlockOpResponseProto.parseFrom(
|
|
|
PBHelperClient.vintPrefixed(in));
|
|
|
- BlockReaderRemote2.checkSuccess(status, peer, block, file);
|
|
|
+ checkSuccess(status, peer, block, file);
|
|
|
ReadOpChecksumInfoProto checksumInfo =
|
|
|
status.getReadOpChecksumInfo();
|
|
|
DataChecksum checksum = DataTransferProtoUtil.fromProto(
|
|
@@ -431,63 +429,29 @@ public class BlockReaderRemote extends FSInputChecker implements BlockReader {
|
|
|
startOffset + " for file " + file);
|
|
|
}
|
|
|
|
|
|
- return new BlockReaderRemote(file, block.getBlockPoolId(), block.getBlockId(),
|
|
|
- in, checksum, verifyChecksum, startOffset, firstChunkOffset, len,
|
|
|
- peer, datanodeID, peerCache, tracer, networkDistance);
|
|
|
+ return new BlockReaderRemote(file, block.getBlockId(), checksum,
|
|
|
+ verifyChecksum, startOffset, firstChunkOffset, len, peer, datanodeID,
|
|
|
+ peerCache, tracer, networkDistance);
|
|
|
}
|
|
|
|
|
|
- @Override
|
|
|
- public synchronized void close() throws IOException {
|
|
|
- startOffset = -1;
|
|
|
- checksum = null;
|
|
|
- if (peerCache != null & sentStatusCode) {
|
|
|
- peerCache.put(datanodeID, peer);
|
|
|
- } else {
|
|
|
- peer.close();
|
|
|
- }
|
|
|
-
|
|
|
- // in will be closed when its Socket is closed.
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public void readFully(byte[] buf, int readOffset, int amtToRead)
|
|
|
+ static void checkSuccess(
|
|
|
+ BlockOpResponseProto status, Peer peer,
|
|
|
+ ExtendedBlock block, String file)
|
|
|
throws IOException {
|
|
|
- IOUtils.readFully(this, buf, readOffset, amtToRead);
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public int readAll(byte[] buf, int offset, int len) throws IOException {
|
|
|
- return readFully(this, buf, offset, len);
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * When the reader reaches end of the read, it sends a status response
|
|
|
- * (e.g. CHECKSUM_OK) to the DN. Failure to do so could lead to the DN
|
|
|
- * closing our connection (which we will re-open), but won't affect
|
|
|
- * data correctness.
|
|
|
- */
|
|
|
- void sendReadResult(Peer peer, Status statusCode) {
|
|
|
- assert !sentStatusCode : "already sent status code to " + peer;
|
|
|
- try {
|
|
|
- BlockReaderRemote2.writeReadResult(peer.getOutputStream(), statusCode);
|
|
|
- sentStatusCode = true;
|
|
|
- } catch (IOException e) {
|
|
|
- // It's ok not to be able to send this. But something is probably wrong.
|
|
|
- LOG.info("Could not send read status (" + statusCode + ") to datanode " +
|
|
|
- peer.getRemoteAddressString() + ": " + e.getMessage());
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public int read(ByteBuffer buf) throws IOException {
|
|
|
- throw new UnsupportedOperationException("readDirect unsupported in BlockReaderRemote");
|
|
|
+ String logInfo = "for OP_READ_BLOCK"
|
|
|
+ + ", self=" + peer.getLocalAddressString()
|
|
|
+ + ", remote=" + peer.getRemoteAddressString()
|
|
|
+ + ", for file " + file
|
|
|
+ + ", for pool " + block.getBlockPoolId()
|
|
|
+ + " block " + block.getBlockId() + "_" + block.getGenerationStamp();
|
|
|
+ DataTransferProtoUtil.checkBlockOpStatus(status, logInfo);
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
public int available() {
|
|
|
// An optimistic estimate of how much data is available
|
|
|
// to us without doing network I/O.
|
|
|
- return BlockReaderRemote2.TCP_WINDOW_SIZE;
|
|
|
+ return TCP_WINDOW_SIZE;
|
|
|
}
|
|
|
|
|
|
@Override
|