|
@@ -40,7 +40,6 @@ import org.slf4j.LoggerFactory;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
import java.io.OutputStream;
|
|
|
-import java.nio.Buffer;
|
|
|
import java.nio.ByteBuffer;
|
|
|
import java.util.Collections;
|
|
|
import java.util.UUID;
|
|
@@ -87,7 +86,7 @@ public class BlockOutputStream extends OutputStream {
|
|
|
private final long streamBufferFlushSize;
|
|
|
private final long streamBufferMaxSize;
|
|
|
private final long watchTimeout;
|
|
|
- private List<ByteBuffer> bufferList;
|
|
|
+ private BufferPool bufferPool;
|
|
|
// The IOException will be set by response handling thread in case there is an
|
|
|
// exception received in the response. If the exception is set, the next
|
|
|
// request will fail upfront.
|
|
@@ -111,8 +110,6 @@ public class BlockOutputStream extends OutputStream {
|
|
|
// map containing mapping for putBlock logIndex to to flushedDataLength Map.
|
|
|
private ConcurrentHashMap<Long, Long> commitIndex2flushedDataMap;
|
|
|
|
|
|
- private int currentBufferIndex;
|
|
|
-
|
|
|
private List<DatanodeDetails> failedServers;
|
|
|
|
|
|
/**
|
|
@@ -124,7 +121,7 @@ public class BlockOutputStream extends OutputStream {
|
|
|
* @param pipeline pipeline where block will be written
|
|
|
* @param traceID container protocol call args
|
|
|
* @param chunkSize chunk size
|
|
|
- * @param bufferList list of byte buffers
|
|
|
+ * @param bufferPool pool of buffers
|
|
|
* @param streamBufferFlushSize flush size
|
|
|
* @param streamBufferMaxSize max size of the currentBuffer
|
|
|
* @param watchTimeout watch timeout
|
|
@@ -135,7 +132,7 @@ public class BlockOutputStream extends OutputStream {
|
|
|
public BlockOutputStream(BlockID blockID, String key,
|
|
|
XceiverClientManager xceiverClientManager, Pipeline pipeline,
|
|
|
String traceID, int chunkSize, long streamBufferFlushSize,
|
|
|
- long streamBufferMaxSize, long watchTimeout, List<ByteBuffer> bufferList,
|
|
|
+ long streamBufferMaxSize, long watchTimeout, BufferPool bufferPool,
|
|
|
ChecksumType checksumType, int bytesPerChecksum)
|
|
|
throws IOException {
|
|
|
this.blockID = blockID;
|
|
@@ -154,7 +151,7 @@ public class BlockOutputStream extends OutputStream {
|
|
|
this.streamBufferFlushSize = streamBufferFlushSize;
|
|
|
this.streamBufferMaxSize = streamBufferMaxSize;
|
|
|
this.watchTimeout = watchTimeout;
|
|
|
- this.bufferList = bufferList;
|
|
|
+ this.bufferPool = bufferPool;
|
|
|
this.checksumType = checksumType;
|
|
|
this.bytesPerChecksum = bytesPerChecksum;
|
|
|
|
|
@@ -164,7 +161,6 @@ public class BlockOutputStream extends OutputStream {
|
|
|
totalAckDataLength = 0;
|
|
|
futureMap = new ConcurrentHashMap<>();
|
|
|
totalDataFlushedLength = 0;
|
|
|
- currentBufferIndex = 0;
|
|
|
writtenDataLength = 0;
|
|
|
failedServers = Collections.emptyList();
|
|
|
}
|
|
@@ -181,13 +177,6 @@ public class BlockOutputStream extends OutputStream {
|
|
|
return writtenDataLength;
|
|
|
}
|
|
|
|
|
|
- private long computeBufferData() {
|
|
|
- int dataLength =
|
|
|
- bufferList.stream().mapToInt(Buffer::position).sum();
|
|
|
- Preconditions.checkState(dataLength <= streamBufferMaxSize);
|
|
|
- return dataLength;
|
|
|
- }
|
|
|
-
|
|
|
public List<DatanodeDetails> getFailedServers() {
|
|
|
return failedServers;
|
|
|
}
|
|
@@ -202,6 +191,7 @@ public class BlockOutputStream extends OutputStream {
|
|
|
|
|
|
@Override
|
|
|
public void write(byte[] b, int off, int len) throws IOException {
|
|
|
+ checkOpen();
|
|
|
if (b == null) {
|
|
|
throw new NullPointerException();
|
|
|
}
|
|
@@ -213,53 +203,40 @@ public class BlockOutputStream extends OutputStream {
|
|
|
return;
|
|
|
}
|
|
|
while (len > 0) {
|
|
|
- checkOpen();
|
|
|
int writeLen;
|
|
|
- allocateBuffer();
|
|
|
- ByteBuffer currentBuffer = getCurrentBuffer();
|
|
|
+
|
|
|
+ // Allocate a buffer if needed. The buffer will be allocated only
|
|
|
+ // once as needed and will be reused again for mutiple blockOutputStream
|
|
|
+ // entries.
|
|
|
+ ByteBuffer currentBuffer = bufferPool.allocateBufferIfNeeded();
|
|
|
+ int pos = currentBuffer.position();
|
|
|
writeLen =
|
|
|
- Math.min(chunkSize - currentBuffer.position() % chunkSize, len);
|
|
|
+ Math.min(chunkSize - pos % chunkSize, len);
|
|
|
currentBuffer.put(b, off, writeLen);
|
|
|
- if (currentBuffer.position() % chunkSize == 0) {
|
|
|
- int pos = currentBuffer.position() - chunkSize;
|
|
|
- int limit = currentBuffer.position();
|
|
|
- writeChunk(pos, limit);
|
|
|
+ if (!currentBuffer.hasRemaining()) {
|
|
|
+ writeChunk(currentBuffer);
|
|
|
}
|
|
|
off += writeLen;
|
|
|
len -= writeLen;
|
|
|
writtenDataLength += writeLen;
|
|
|
- if (currentBuffer.position() == streamBufferFlushSize) {
|
|
|
+ if (shouldFlush()) {
|
|
|
totalDataFlushedLength += streamBufferFlushSize;
|
|
|
handlePartialFlush();
|
|
|
}
|
|
|
- long bufferedData = computeBufferData();
|
|
|
- // Data in the bufferList can not exceed streamBufferMaxSize
|
|
|
- if (bufferedData == streamBufferMaxSize) {
|
|
|
+ // Data in the bufferPool can not exceed streamBufferMaxSize
|
|
|
+ if (isBufferPoolFull()) {
|
|
|
handleFullBuffer();
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- private ByteBuffer getCurrentBuffer() {
|
|
|
- ByteBuffer buffer = bufferList.get(currentBufferIndex);
|
|
|
- if (!buffer.hasRemaining()) {
|
|
|
- currentBufferIndex =
|
|
|
- currentBufferIndex < getMaxNumBuffers() - 1 ? ++currentBufferIndex :
|
|
|
- 0;
|
|
|
- }
|
|
|
- return bufferList.get(currentBufferIndex);
|
|
|
- }
|
|
|
-
|
|
|
- private int getMaxNumBuffers() {
|
|
|
- return (int)(streamBufferMaxSize/streamBufferFlushSize);
|
|
|
+ private boolean shouldFlush() {
|
|
|
+ return writtenDataLength % streamBufferFlushSize == 0;
|
|
|
}
|
|
|
|
|
|
- private void allocateBuffer() {
|
|
|
- for (int i = bufferList.size(); i < getMaxNumBuffers(); i++) {
|
|
|
- bufferList.add(ByteBuffer.allocate((int)streamBufferFlushSize));
|
|
|
- }
|
|
|
+ private boolean isBufferPoolFull() {
|
|
|
+ return bufferPool.computeBufferData() == streamBufferMaxSize;
|
|
|
}
|
|
|
-
|
|
|
/**
|
|
|
* Will be called on the retryPath in case closedContainerException/
|
|
|
* TimeoutException.
|
|
@@ -272,36 +249,37 @@ public class BlockOutputStream extends OutputStream {
|
|
|
if (len == 0) {
|
|
|
return;
|
|
|
}
|
|
|
- int off = 0;
|
|
|
- int pos = off;
|
|
|
+ int count = 0;
|
|
|
+ Preconditions.checkArgument(len <= streamBufferMaxSize);
|
|
|
while (len > 0) {
|
|
|
long writeLen;
|
|
|
writeLen = Math.min(chunkSize, len);
|
|
|
if (writeLen == chunkSize) {
|
|
|
- int limit = pos + chunkSize;
|
|
|
- writeChunk(pos, limit);
|
|
|
+ writeChunk(bufferPool.getBuffer(count));
|
|
|
}
|
|
|
- off += writeLen;
|
|
|
len -= writeLen;
|
|
|
+ count++;
|
|
|
writtenDataLength += writeLen;
|
|
|
- if (off % streamBufferFlushSize == 0) {
|
|
|
- // reset the position to zero as now we wll readng thhe next buffer in
|
|
|
- // the list
|
|
|
- pos = 0;
|
|
|
+ if (shouldFlush()) {
|
|
|
+ // reset the position to zero as now we will be reading the
|
|
|
+ // next buffer in the list
|
|
|
totalDataFlushedLength += streamBufferFlushSize;
|
|
|
handlePartialFlush();
|
|
|
}
|
|
|
- if (computeBufferData() % streamBufferMaxSize == 0) {
|
|
|
+
|
|
|
+ // we should not call isBufferFull here. The buffer might already be full
|
|
|
+ // as whole data is already cached in the buffer. We should just validate
|
|
|
+ // if we wrote data of size streamBufferMaxSize to call for handling
|
|
|
+ // full buffer condition.
|
|
|
+ if (writtenDataLength == streamBufferMaxSize) {
|
|
|
handleFullBuffer();
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * just update the totalAckDataLength. Since we have allocated
|
|
|
- * the currentBuffer more than the streamBufferMaxSize, we can keep on writing
|
|
|
- * to the currentBuffer. In case of failure, we will read the data starting
|
|
|
- * from totalAckDataLength.
|
|
|
+ * just update the totalAckDataLength. In case of failure,
|
|
|
+ * we will read the data starting from totalAckDataLength.
|
|
|
*/
|
|
|
private void updateFlushIndex(long index) {
|
|
|
if (!commitIndex2flushedDataMap.isEmpty()) {
|
|
@@ -310,13 +288,15 @@ public class BlockOutputStream extends OutputStream {
|
|
|
LOG.debug("Total data successfully replicated: " + totalAckDataLength);
|
|
|
futureMap.remove(totalAckDataLength);
|
|
|
// Flush has been committed to required servers successful.
|
|
|
- // just swap the bufferList head and tail after clearing.
|
|
|
- ByteBuffer currentBuffer = bufferList.remove(0);
|
|
|
- currentBuffer.clear();
|
|
|
- if (currentBufferIndex != 0) {
|
|
|
- currentBufferIndex--;
|
|
|
+ // just release the current buffer from the buffer pool.
|
|
|
+
|
|
|
+ // every entry removed from the putBlock future Map signifies
|
|
|
+ // streamBufferFlushSize/chunkSize no of chunks successfully committed.
|
|
|
+ // Release the buffers from the buffer pool to be reused again.
|
|
|
+ int chunkCount = (int) (streamBufferFlushSize / chunkSize);
|
|
|
+ for (int i = 0; i < chunkCount; i++) {
|
|
|
+ bufferPool.releaseBuffer();
|
|
|
}
|
|
|
- bufferList.add(currentBuffer);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -450,91 +430,85 @@ public class BlockOutputStream extends OutputStream {
|
|
|
@Override
|
|
|
public void flush() throws IOException {
|
|
|
if (xceiverClientManager != null && xceiverClient != null
|
|
|
- && bufferList != null) {
|
|
|
- checkOpen();
|
|
|
- int bufferSize = bufferList.size();
|
|
|
- if (bufferSize > 0) {
|
|
|
- try {
|
|
|
- // flush the last chunk data residing on the currentBuffer
|
|
|
- if (totalDataFlushedLength < writtenDataLength) {
|
|
|
- ByteBuffer currentBuffer = getCurrentBuffer();
|
|
|
- int pos = currentBuffer.position() - (currentBuffer.position()
|
|
|
- % chunkSize);
|
|
|
- int limit = currentBuffer.position() - pos;
|
|
|
- writeChunk(pos, currentBuffer.position());
|
|
|
- totalDataFlushedLength += limit;
|
|
|
- handlePartialFlush();
|
|
|
- }
|
|
|
- waitOnFlushFutures();
|
|
|
- // just check again if the exception is hit while waiting for the
|
|
|
- // futures to ensure flush has indeed succeeded
|
|
|
- checkOpen();
|
|
|
- } catch (InterruptedException | ExecutionException e) {
|
|
|
- adjustBuffersOnException();
|
|
|
- throw new IOException(
|
|
|
- "Unexpected Storage Container Exception: " + e.toString(), e);
|
|
|
- }
|
|
|
+ && bufferPool != null && bufferPool.getSize() > 0) {
|
|
|
+ try {
|
|
|
+ handleFlush();
|
|
|
+ } catch (InterruptedException | ExecutionException e) {
|
|
|
+ adjustBuffersOnException();
|
|
|
+ throw new IOException(
|
|
|
+ "Unexpected Storage Container Exception: " + e.toString(), e);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- private void writeChunk(int pos, int limit) throws IOException {
|
|
|
+
|
|
|
+ private void writeChunk(ByteBuffer buffer)
|
|
|
+ throws IOException {
|
|
|
// Please note : We are not flipping the slice when we write since
|
|
|
// the slices are pointing the currentBuffer start and end as needed for
|
|
|
// the chunk write. Also please note, Duplicate does not create a
|
|
|
// copy of data, it only creates metadata that points to the data
|
|
|
// stream.
|
|
|
- ByteBuffer chunk = bufferList.get(currentBufferIndex).duplicate();
|
|
|
- chunk.position(pos);
|
|
|
- chunk.limit(limit);
|
|
|
+ ByteBuffer chunk = buffer.duplicate();
|
|
|
+ chunk.position(0);
|
|
|
+ chunk.limit(buffer.position());
|
|
|
writeChunkToContainer(chunk);
|
|
|
}
|
|
|
|
|
|
+ private void handleFlush()
|
|
|
+ throws IOException, InterruptedException, ExecutionException {
|
|
|
+ checkOpen();
|
|
|
+ // flush the last chunk data residing on the currentBuffer
|
|
|
+ if (totalDataFlushedLength < writtenDataLength) {
|
|
|
+ ByteBuffer currentBuffer = bufferPool.getBuffer();
|
|
|
+ int pos = currentBuffer.position();
|
|
|
+ writeChunk(currentBuffer);
|
|
|
+ totalDataFlushedLength += pos;
|
|
|
+ handlePartialFlush();
|
|
|
+ }
|
|
|
+ waitOnFlushFutures();
|
|
|
+ // just check again if the exception is hit while waiting for the
|
|
|
+ // futures to ensure flush has indeed succeeded
|
|
|
+
|
|
|
+ // irrespective of whether the commitIndex2flushedDataMap is empty
|
|
|
+ // or not, ensure there is no exception set
|
|
|
+ checkOpen();
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
@Override
|
|
|
public void close() throws IOException {
|
|
|
if (xceiverClientManager != null && xceiverClient != null
|
|
|
- && bufferList != null) {
|
|
|
- int bufferSize = bufferList.size();
|
|
|
- if (bufferSize > 0) {
|
|
|
- try {
|
|
|
- // flush the last chunk data residing on the currentBuffer
|
|
|
- if (totalDataFlushedLength < writtenDataLength) {
|
|
|
- ByteBuffer currentBuffer = getCurrentBuffer();
|
|
|
- int pos = currentBuffer.position() - (currentBuffer.position()
|
|
|
- % chunkSize);
|
|
|
- int limit = currentBuffer.position() - pos;
|
|
|
- writeChunk(pos, currentBuffer.position());
|
|
|
- totalDataFlushedLength += limit;
|
|
|
- handlePartialFlush();
|
|
|
- }
|
|
|
- waitOnFlushFutures();
|
|
|
- // irrespective of whether the commitIndex2flushedDataMap is empty
|
|
|
- // or not, ensure there is no exception set
|
|
|
- checkOpen();
|
|
|
- if (!commitIndex2flushedDataMap.isEmpty()) {
|
|
|
- // wait for the last commit index in the commitIndex2flushedDataMap
|
|
|
- // to get committed to all or majority of nodes in case timeout
|
|
|
- // happens.
|
|
|
- long lastIndex =
|
|
|
- commitIndex2flushedDataMap.keySet().stream()
|
|
|
- .mapToLong(v -> v).max().getAsLong();
|
|
|
- LOG.debug(
|
|
|
- "waiting for last flush Index " + lastIndex + " to catch up");
|
|
|
- watchForCommit(lastIndex);
|
|
|
- }
|
|
|
- } catch (InterruptedException | ExecutionException e) {
|
|
|
- adjustBuffersOnException();
|
|
|
- throw new IOException(
|
|
|
- "Unexpected Storage Container Exception: " + e.toString(), e);
|
|
|
- } finally {
|
|
|
- cleanup(false);
|
|
|
+ && bufferPool != null && bufferPool.getSize() > 0) {
|
|
|
+ try {
|
|
|
+ handleFlush();
|
|
|
+ if (!commitIndex2flushedDataMap.isEmpty()) {
|
|
|
+ // wait for the last commit index in the commitIndex2flushedDataMap
|
|
|
+ // to get committed to all or majority of nodes in case timeout
|
|
|
+ // happens.
|
|
|
+ long lastIndex =
|
|
|
+ commitIndex2flushedDataMap.keySet().stream().mapToLong(v -> v)
|
|
|
+ .max().getAsLong();
|
|
|
+ LOG.debug(
|
|
|
+ "waiting for last flush Index " + lastIndex + " to catch up");
|
|
|
+ watchForCommit(lastIndex);
|
|
|
}
|
|
|
+ } catch (InterruptedException | ExecutionException e) {
|
|
|
+ adjustBuffersOnException();
|
|
|
+ throw new IOException(
|
|
|
+ "Unexpected Storage Container Exception: " + e.toString(), e);
|
|
|
+ } finally {
|
|
|
+ cleanup(false);
|
|
|
}
|
|
|
- // clear the currentBuffer
|
|
|
- bufferList.stream().forEach(ByteBuffer::clear);
|
|
|
+ // TODO: Turn the below buffer empty check on whne Standalone pipeline
|
|
|
+ // is removed in the write path in tests
|
|
|
+ // Preconditions.checkArgument(buffer.position() == 0);
|
|
|
+ // bufferPool.checkBufferPoolEmpty();
|
|
|
+
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+
|
|
|
private void waitOnFlushFutures()
|
|
|
throws InterruptedException, ExecutionException {
|
|
|
CompletableFuture<Void> combinedFuture = CompletableFuture.allOf(
|