|
@@ -218,6 +218,13 @@ class DataStreamer extends Daemon {
|
|
|
private boolean failPacket = false;
|
|
|
private final long dfsclientSlowLogThresholdMs;
|
|
|
private long artificialSlowdown = 0;
|
|
|
+ // List of congested data nodes. The stream will back off if the DataNodes
|
|
|
+ // are congested
|
|
|
+ private final ArrayList<DatanodeInfo> congestedNodes = new ArrayList<>();
|
|
|
+ private static final int CONGESTION_BACKOFF_MEAN_TIME_IN_MS = 5000;
|
|
|
+ private static final int CONGESTION_BACK_OFF_MAX_TIME_IN_MS =
|
|
|
+ CONGESTION_BACKOFF_MEAN_TIME_IN_MS * 10;
|
|
|
+ private int lastCongestionBackoffTime;
|
|
|
|
|
|
private final LoadingCache<DatanodeInfo, DatanodeInfo> excludedNodes;
|
|
|
|
|
@@ -386,6 +393,11 @@ class DataStreamer extends Daemon {
|
|
|
one = createHeartbeatPacket();
|
|
|
assert one != null;
|
|
|
} else {
|
|
|
+ try {
|
|
|
+ backOffIfNecessary();
|
|
|
+ } catch (InterruptedException e) {
|
|
|
+ DFSClient.LOG.warn("Caught exception ", e);
|
|
|
+ }
|
|
|
one = dataQueue.getFirst(); // regular data packet
|
|
|
long parents[] = one.getTraceParents();
|
|
|
if (parents.length > 0) {
|
|
@@ -815,9 +827,14 @@ class DataStreamer extends Daemon {
|
|
|
|
|
|
long seqno = ack.getSeqno();
|
|
|
// processes response status from datanodes.
|
|
|
+ ArrayList<DatanodeInfo> congestedNodesFromAck = new ArrayList<>();
|
|
|
for (int i = ack.getNumOfReplies()-1; i >=0 && dfsClient.clientRunning; i--) {
|
|
|
final Status reply = PipelineAck.getStatusFromHeader(ack
|
|
|
.getHeaderFlag(i));
|
|
|
+ if (PipelineAck.getECNFromHeader(ack.getHeaderFlag(i)) ==
|
|
|
+ PipelineAck.ECN.CONGESTED) {
|
|
|
+ congestedNodesFromAck.add(targets[i]);
|
|
|
+ }
|
|
|
// Restart will not be treated differently unless it is
|
|
|
// the local node or the only one in the pipeline.
|
|
|
if (PipelineAck.isRestartOOBStatus(reply) &&
|
|
@@ -839,6 +856,18 @@ class DataStreamer extends Daemon {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ if (!congestedNodesFromAck.isEmpty()) {
|
|
|
+ synchronized (congestedNodes) {
|
|
|
+ congestedNodes.clear();
|
|
|
+ congestedNodes.addAll(congestedNodesFromAck);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ synchronized (congestedNodes) {
|
|
|
+ congestedNodes.clear();
|
|
|
+ lastCongestionBackoffTime = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
assert seqno != PipelineAck.UNKOWN_SEQNO :
|
|
|
"Ack for unknown seqno should be a failed ack: " + ack;
|
|
|
if (seqno == DFSPacket.HEART_BEAT_SEQNO) { // a heartbeat ack
|
|
@@ -1543,6 +1572,40 @@ class DataStreamer extends Daemon {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * This function sleeps for a certain amount of time when the writing
|
|
|
+ * pipeline is congested. The function calculates the time based on a
|
|
|
+ * decorrelated filter.
|
|
|
+ *
|
|
|
+ * @see
|
|
|
+ * <a href="http://www.awsarchitectureblog.com/2015/03/backoff.html">
|
|
|
+ * http://www.awsarchitectureblog.com/2015/03/backoff.html</a>
|
|
|
+ */
|
|
|
+ private void backOffIfNecessary() throws InterruptedException {
|
|
|
+ int t = 0;
|
|
|
+ synchronized (congestedNodes) {
|
|
|
+ if (!congestedNodes.isEmpty()) {
|
|
|
+ StringBuilder sb = new StringBuilder("DataNode");
|
|
|
+ for (DatanodeInfo i : congestedNodes) {
|
|
|
+ sb.append(' ').append(i);
|
|
|
+ }
|
|
|
+ int range = Math.abs(lastCongestionBackoffTime * 3 -
|
|
|
+ CONGESTION_BACKOFF_MEAN_TIME_IN_MS);
|
|
|
+ int base = Math.min(lastCongestionBackoffTime * 3,
|
|
|
+ CONGESTION_BACKOFF_MEAN_TIME_IN_MS);
|
|
|
+ t = Math.min(CONGESTION_BACK_OFF_MAX_TIME_IN_MS,
|
|
|
+ (int)(base + Math.random() * range));
|
|
|
+ lastCongestionBackoffTime = t;
|
|
|
+ sb.append(" are congested. Backing off for ").append(t).append(" ms");
|
|
|
+ DFSClient.LOG.info(sb.toString());
|
|
|
+ congestedNodes.clear();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (t != 0) {
|
|
|
+ Thread.sleep(t);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* get the block this streamer is writing to
|
|
|
*
|