|
@@ -18,22 +18,32 @@
|
|
|
|
|
|
package org.apache.hadoop.scm.storage;
|
|
|
|
|
|
-import static org.apache.hadoop.scm.storage.ContainerProtocolCalls.putKey;
|
|
|
-import static org.apache.hadoop.scm.storage.ContainerProtocolCalls.writeChunk;
|
|
|
-
|
|
|
-import java.io.IOException;
|
|
|
-import java.io.OutputStream;
|
|
|
-import java.nio.ByteBuffer;
|
|
|
-import java.util.UUID;
|
|
|
-
|
|
|
+import com.google.common.base.Preconditions;
|
|
|
import com.google.protobuf.ByteString;
|
|
|
-
|
|
|
import org.apache.commons.codec.digest.DigestUtils;
|
|
|
+import org.apache.commons.lang3.tuple.ImmutablePair;
|
|
|
+import org.apache.hadoop.hdfs.ozone.protocol.proto.ContainerProtos;
|
|
|
import org.apache.hadoop.hdfs.ozone.protocol.proto.ContainerProtos.ChunkInfo;
|
|
|
import org.apache.hadoop.hdfs.ozone.protocol.proto.ContainerProtos.KeyData;
|
|
|
import org.apache.hadoop.ozone.protocol.proto.OzoneProtos.KeyValue;
|
|
|
import org.apache.hadoop.scm.XceiverClientManager;
|
|
|
import org.apache.hadoop.scm.XceiverClientSpi;
|
|
|
+import org.apache.hadoop.util.Time;
|
|
|
+
|
|
|
+import java.io.IOException;
|
|
|
+import java.io.OutputStream;
|
|
|
+import java.nio.ByteBuffer;
|
|
|
+import java.util.LinkedList;
|
|
|
+import java.util.List;
|
|
|
+import java.util.UUID;
|
|
|
+import java.util.concurrent.CompletableFuture;
|
|
|
+import java.util.concurrent.ExecutionException;
|
|
|
+
|
|
|
+import static org.apache.hadoop.hdfs.ozone.protocol.proto.ContainerProtos
|
|
|
+ .Result.SUCCESS;
|
|
|
+import static org.apache.hadoop.scm.storage.ContainerProtocolCalls.putKey;
|
|
|
+import static org.apache.hadoop.scm.storage.ContainerProtocolCalls.writeChunk;
|
|
|
+
|
|
|
|
|
|
/**
|
|
|
* An {@link OutputStream} used by the REST service in combination with the
|
|
@@ -57,12 +67,12 @@ public class ChunkOutputStream extends OutputStream {
|
|
|
private final String key;
|
|
|
private final String traceID;
|
|
|
private final KeyData.Builder containerKeyData;
|
|
|
+ private final String streamId;
|
|
|
private XceiverClientManager xceiverClientManager;
|
|
|
private XceiverClientSpi xceiverClient;
|
|
|
private ByteBuffer buffer;
|
|
|
- private final String streamId;
|
|
|
- private int chunkIndex;
|
|
|
private int chunkSize;
|
|
|
+ private int streamBufferSize;
|
|
|
|
|
|
/**
|
|
|
* Creates a new ChunkOutputStream.
|
|
@@ -73,14 +83,18 @@ public class ChunkOutputStream extends OutputStream {
|
|
|
* @param xceiverClient client to perform container calls
|
|
|
* @param traceID container protocol call args
|
|
|
* @param chunkSize chunk size
|
|
|
+ * @param maxBufferSize -- Controls the maximum amount of memory that we need
|
|
|
+ * to allocate data buffering.
|
|
|
*/
|
|
|
public ChunkOutputStream(String containerKey, String key,
|
|
|
XceiverClientManager xceiverClientManager, XceiverClientSpi xceiverClient,
|
|
|
- String traceID, int chunkSize) {
|
|
|
+ String traceID, int chunkSize, int maxBufferSize) {
|
|
|
this.containerKey = containerKey;
|
|
|
this.key = key;
|
|
|
this.traceID = traceID;
|
|
|
this.chunkSize = chunkSize;
|
|
|
+ this.streamBufferSize = maxBufferSize;
|
|
|
+
|
|
|
KeyValue keyValue = KeyValue.newBuilder()
|
|
|
.setKey("TYPE").setValue("KEY").build();
|
|
|
this.containerKeyData = KeyData.newBuilder()
|
|
@@ -89,22 +103,24 @@ public class ChunkOutputStream extends OutputStream {
|
|
|
.addMetadata(keyValue);
|
|
|
this.xceiverClientManager = xceiverClientManager;
|
|
|
this.xceiverClient = xceiverClient;
|
|
|
- this.buffer = ByteBuffer.allocate(chunkSize);
|
|
|
+ this.buffer = ByteBuffer.allocate(maxBufferSize);
|
|
|
this.streamId = UUID.randomUUID().toString();
|
|
|
- this.chunkIndex = 0;
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * {@inheritDoc}
|
|
|
+ */
|
|
|
@Override
|
|
|
public synchronized void write(int b) throws IOException {
|
|
|
checkOpen();
|
|
|
- int rollbackPosition = buffer.position();
|
|
|
- int rollbackLimit = buffer.limit();
|
|
|
- buffer.put((byte)b);
|
|
|
- if (buffer.position() == chunkSize) {
|
|
|
- flushBufferToChunk(rollbackPosition, rollbackLimit);
|
|
|
- }
|
|
|
+ byte[] c = new byte[1];
|
|
|
+ c[0] = (byte) b;
|
|
|
+ write(c, 0, 1);
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * {@inheritDoc}
|
|
|
+ */
|
|
|
@Override
|
|
|
public void write(byte[] b, int off, int len) throws IOException {
|
|
|
if (b == null) {
|
|
@@ -118,17 +134,90 @@ public class ChunkOutputStream extends OutputStream {
|
|
|
return;
|
|
|
}
|
|
|
checkOpen();
|
|
|
- while (len > 0) {
|
|
|
- int writeLen = Math.min(chunkSize - buffer.position(), len);
|
|
|
- int rollbackPosition = buffer.position();
|
|
|
- int rollbackLimit = buffer.limit();
|
|
|
- buffer.put(b, off, writeLen);
|
|
|
- if (buffer.position() == chunkSize) {
|
|
|
- flushBufferToChunk(rollbackPosition, rollbackLimit);
|
|
|
+ int rollbackPosition = buffer.position();
|
|
|
+ int rollbackLimit = buffer.limit();
|
|
|
+ try {
|
|
|
+ List<ImmutablePair<CompletableFuture<ContainerProtos
|
|
|
+ .ContainerCommandResponseProto>, ChunkInfo>>
|
|
|
+ writeFutures = writeInParallel(b, off, len);
|
|
|
+ // This is a rendezvous point for this function call, all chunk I/O
|
|
|
+ // for this block must complete before we can declare this call as
|
|
|
+ // complete.
|
|
|
+
|
|
|
+ // Wait until all the futures complete or throws an exception if any of
|
|
|
+ // the calls ended with an exception this call will throw.
|
|
|
+ // if futures is null, it means that we wrote the data to the buffer and
|
|
|
+ // returned.
|
|
|
+ if (writeFutures != null) {
|
|
|
+ CompletableFuture.allOf(writeFutures.toArray(new
|
|
|
+ CompletableFuture[writeFutures.size()])).join();
|
|
|
+
|
|
|
+ // Wrote this data, we will clear this buffer now.
|
|
|
+ buffer.clear();
|
|
|
+ }
|
|
|
+ } catch (InterruptedException | ExecutionException e) {
|
|
|
+ buffer.position(rollbackPosition);
|
|
|
+ buffer.limit(rollbackLimit);
|
|
|
+ throw new IOException("Unexpected error in write. ", e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Write a given block into many small chunks in parallel.
|
|
|
+ *
|
|
|
+ * @param b
|
|
|
+ * @param off
|
|
|
+ * @param len
|
|
|
+ * @throws IOException
|
|
|
+ * @throws ExecutionException
|
|
|
+ * @throws InterruptedException
|
|
|
+ */
|
|
|
+ public List<ImmutablePair<CompletableFuture<ContainerProtos
|
|
|
+ .ContainerCommandResponseProto>, ChunkInfo>>
|
|
|
+ writeInParallel(byte[] b, int off, int len)
|
|
|
+ throws IOException, ExecutionException, InterruptedException {
|
|
|
+
|
|
|
+ Preconditions.checkArgument(len <= streamBufferSize,
|
|
|
+ "A chunk write cannot be " + "larger than max buffer size limit.");
|
|
|
+ long newBlockCount = len / chunkSize;
|
|
|
+ buffer.put(b, off, len);
|
|
|
+ List<ImmutablePair<CompletableFuture<ContainerProtos
|
|
|
+ .ContainerCommandResponseProto>, ChunkInfo>>
|
|
|
+ writeFutures = new LinkedList<>();
|
|
|
+
|
|
|
+ // We if must have at least a chunkSize of data ready to write, if so we
|
|
|
+ // will go ahead and start writing that data.
|
|
|
+ if (buffer.position() >= chunkSize) {
|
|
|
+ // Allocate new byte slices which will point to each chunk of data
|
|
|
+ // that we want to write. Divide the byte buffer into individual chunks
|
|
|
+ // each of length equals to chunkSize max where each chunk will be
|
|
|
+ // assigned a chunkId where, for each chunk the async write requests will
|
|
|
+ // be made and wait for all of them to return before the write call
|
|
|
+ // returns.
|
|
|
+ for (int chunkId = 0; chunkId < newBlockCount; chunkId++) {
|
|
|
+ // Please note : We are not flipping the slice when we write since
|
|
|
+ // the slices are pointing the buffer start and end as needed for
|
|
|
+ // the chunk write. Also please note, Duplicate does not create a
|
|
|
+ // copy of data, it only creates metadata that points to the data
|
|
|
+ // stream.
|
|
|
+ ByteBuffer chunk = buffer.duplicate();
|
|
|
+ Preconditions.checkState((chunkId * chunkSize) < buffer.limit(),
|
|
|
+ "Chunk offset cannot be beyond the limits of the buffer.");
|
|
|
+ chunk.position(chunkId * chunkSize);
|
|
|
+ // Min handles the case where the last block might be lesser than
|
|
|
+ // chunk Size.
|
|
|
+ chunk.limit(chunk.position() +
|
|
|
+ Math.min(chunkSize, chunk.remaining() - (chunkId * chunkSize)));
|
|
|
+
|
|
|
+ // Schedule all the writes, this is a non-block call which returns
|
|
|
+ // futures. We collect these futures and wait for all of them to
|
|
|
+ // complete in the next line.
|
|
|
+ writeFutures.add(writeChunkToContainer(chunk, 0, chunkSize));
|
|
|
}
|
|
|
- off += writeLen;
|
|
|
- len -= writeLen;
|
|
|
+ return writeFutures;
|
|
|
}
|
|
|
+ // Nothing to do , return null.
|
|
|
+ return null;
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -137,7 +226,19 @@ public class ChunkOutputStream extends OutputStream {
|
|
|
if (buffer.position() > 0) {
|
|
|
int rollbackPosition = buffer.position();
|
|
|
int rollbackLimit = buffer.limit();
|
|
|
- flushBufferToChunk(rollbackPosition, rollbackLimit);
|
|
|
+ ByteBuffer chunk = buffer.duplicate();
|
|
|
+ try {
|
|
|
+
|
|
|
+ ImmutablePair<CompletableFuture<ContainerProtos
|
|
|
+ .ContainerCommandResponseProto>, ChunkInfo>
|
|
|
+ result = writeChunkToContainer(chunk, 0, chunkSize);
|
|
|
+ updateChunkInfo(result);
|
|
|
+ buffer.clear();
|
|
|
+ } catch (ExecutionException | InterruptedException e) {
|
|
|
+ buffer.position(rollbackPosition);
|
|
|
+ buffer.limit(rollbackLimit);
|
|
|
+ throw new IOException("Failure in flush", e);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -147,10 +248,20 @@ public class ChunkOutputStream extends OutputStream {
|
|
|
buffer != null) {
|
|
|
try {
|
|
|
if (buffer.position() > 0) {
|
|
|
- writeChunkToContainer();
|
|
|
+ // This flip is needed since this is the real buffer to which we
|
|
|
+ // are writing and position will have moved each time we did a put.
|
|
|
+ buffer.flip();
|
|
|
+
|
|
|
+ // Call get immediately to make this call Synchronous.
|
|
|
+
|
|
|
+ ImmutablePair<CompletableFuture<ContainerProtos
|
|
|
+ .ContainerCommandResponseProto>, ChunkInfo>
|
|
|
+ result = writeChunkToContainer(buffer, 0, buffer.limit());
|
|
|
+ updateChunkInfo(result);
|
|
|
+ buffer.clear();
|
|
|
}
|
|
|
putKey(xceiverClient, containerKeyData.build(), traceID);
|
|
|
- } catch (IOException e) {
|
|
|
+ } catch (IOException | InterruptedException | ExecutionException e) {
|
|
|
throw new IOException(
|
|
|
"Unexpected Storage Container Exception: " + e.toString(), e);
|
|
|
} finally {
|
|
@@ -163,6 +274,24 @@ public class ChunkOutputStream extends OutputStream {
|
|
|
|
|
|
}
|
|
|
|
|
|
+ private void updateChunkInfo(
|
|
|
+ ImmutablePair<
|
|
|
+ CompletableFuture<ContainerProtos.ContainerCommandResponseProto>,
|
|
|
+ ChunkInfo
|
|
|
+ > result) throws InterruptedException, ExecutionException {
|
|
|
+ // Wait for this call to complete.
|
|
|
+ ContainerProtos.ContainerCommandResponseProto response =
|
|
|
+ result.getLeft().get();
|
|
|
+
|
|
|
+ // If the write call to the chunk is successful, we need to add that
|
|
|
+ // chunk information to the containerKeyData.
|
|
|
+ // TODO: Clean up the garbage in case of failure.
|
|
|
+ if(response.getResult() == SUCCESS) {
|
|
|
+ ChunkInfo chunk = result.getRight();
|
|
|
+ containerKeyData.addChunks(chunk);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* Checks if the stream is open. If not, throws an exception.
|
|
|
*
|
|
@@ -174,54 +303,36 @@ public class ChunkOutputStream extends OutputStream {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /**
|
|
|
- * Attempts to flush buffered writes by writing a new chunk to the container.
|
|
|
- * If successful, then clears the buffer to prepare to receive writes for a
|
|
|
- * new chunk.
|
|
|
- *
|
|
|
- * @param rollbackPosition position to restore in buffer if write fails
|
|
|
- * @param rollbackLimit limit to restore in buffer if write fails
|
|
|
- * @throws IOException if there is an I/O error while performing the call
|
|
|
- */
|
|
|
- private synchronized void flushBufferToChunk(int rollbackPosition,
|
|
|
- int rollbackLimit) throws IOException {
|
|
|
- boolean success = false;
|
|
|
- try {
|
|
|
- writeChunkToContainer();
|
|
|
- success = true;
|
|
|
- } finally {
|
|
|
- if (success) {
|
|
|
- buffer.clear();
|
|
|
- } else {
|
|
|
- buffer.position(rollbackPosition);
|
|
|
- buffer.limit(rollbackLimit);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
/**
|
|
|
* Writes buffered data as a new chunk to the container and saves chunk
|
|
|
* information to be used later in putKey call.
|
|
|
*
|
|
|
- * @throws IOException if there is an I/O error while performing the call
|
|
|
+ * @param data -- Data to write.
|
|
|
+ * @param offset - offset to the data buffer
|
|
|
+ * @param len - Length in bytes
|
|
|
+ * @return Returns a Immutable pair -- A future object that will contian
|
|
|
+ * the result of the operation, and the chunkInfo that we wrote.
|
|
|
+ *
|
|
|
+ * @throws IOException
|
|
|
+ * @throws ExecutionException
|
|
|
+ * @throws InterruptedException
|
|
|
*/
|
|
|
- private synchronized void writeChunkToContainer() throws IOException {
|
|
|
- buffer.flip();
|
|
|
- ByteString data = ByteString.copyFrom(buffer);
|
|
|
- ChunkInfo chunk = ChunkInfo
|
|
|
- .newBuilder()
|
|
|
- .setChunkName(
|
|
|
+ private ImmutablePair<
|
|
|
+ CompletableFuture<ContainerProtos.ContainerCommandResponseProto>,
|
|
|
+ ChunkInfo>
|
|
|
+ writeChunkToContainer(ByteBuffer data, int offset, int len)
|
|
|
+ throws IOException, ExecutionException, InterruptedException {
|
|
|
+
|
|
|
+
|
|
|
+ ByteString dataString = ByteString.copyFrom(data);
|
|
|
+ ChunkInfo chunk = ChunkInfo.newBuilder().setChunkName(
|
|
|
DigestUtils.md5Hex(key) + "_stream_"
|
|
|
- + streamId + "_chunk_" + ++chunkIndex)
|
|
|
+ + streamId + "_chunk_" + Time.monotonicNowNanos())
|
|
|
.setOffset(0)
|
|
|
- .setLen(data.size())
|
|
|
+ .setLen(len)
|
|
|
.build();
|
|
|
- try {
|
|
|
- writeChunk(xceiverClient, chunk, key, data, traceID);
|
|
|
- } catch (IOException e) {
|
|
|
- throw new IOException(
|
|
|
- "Unexpected Storage Container Exception: " + e.toString(), e);
|
|
|
- }
|
|
|
- containerKeyData.addChunks(chunk);
|
|
|
+ CompletableFuture<ContainerProtos.ContainerCommandResponseProto> response =
|
|
|
+ writeChunk(xceiverClient, chunk, key, dataString, traceID);
|
|
|
+ return new ImmutablePair(response, chunk);
|
|
|
}
|
|
|
}
|