|
@@ -38,6 +38,7 @@ import java.util.HashSet;
|
|
|
import java.util.LinkedList;
|
|
|
import java.util.List;
|
|
|
import java.util.Map;
|
|
|
+import java.util.Set;
|
|
|
import java.util.concurrent.TimeUnit;
|
|
|
import java.util.concurrent.atomic.AtomicBoolean;
|
|
|
import java.util.concurrent.atomic.AtomicReference;
|
|
@@ -525,11 +526,13 @@ class DataStreamer extends Daemon {
|
|
|
// List of congested data nodes. The stream will back off if the DataNodes
|
|
|
// are congested
|
|
|
private final List<DatanodeInfo> congestedNodes = new ArrayList<>();
|
|
|
+ private final Map<DatanodeInfo, Integer> slowNodeMap = new HashMap<>();
|
|
|
private static final int CONGESTION_BACKOFF_MEAN_TIME_IN_MS = 5000;
|
|
|
private static final int CONGESTION_BACK_OFF_MAX_TIME_IN_MS =
|
|
|
CONGESTION_BACKOFF_MEAN_TIME_IN_MS * 10;
|
|
|
private int lastCongestionBackoffTime;
|
|
|
private int maxPipelineRecoveryRetries;
|
|
|
+ private int markSlowNodeAsBadNodeThreshold;
|
|
|
|
|
|
protected final LoadingCache<DatanodeInfo, DatanodeInfo> excludedNodes;
|
|
|
private final String[] favoredNodes;
|
|
@@ -559,6 +562,7 @@ class DataStreamer extends Daemon {
|
|
|
this.errorState = new ErrorState(conf.getDatanodeRestartTimeout());
|
|
|
this.addBlockFlags = flags;
|
|
|
this.maxPipelineRecoveryRetries = conf.getMaxPipelineRecoveryRetries();
|
|
|
+ this.markSlowNodeAsBadNodeThreshold = conf.getMarkSlowNodeAsBadNodeThreshold();
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -1155,6 +1159,7 @@ class DataStreamer extends Daemon {
|
|
|
long seqno = ack.getSeqno();
|
|
|
// processes response status from datanodes.
|
|
|
ArrayList<DatanodeInfo> congestedNodesFromAck = new ArrayList<>();
|
|
|
+ ArrayList<DatanodeInfo> slownodesFromAck = new ArrayList<>();
|
|
|
for (int i = ack.getNumOfReplies()-1; i >=0 && dfsClient.clientRunning; i--) {
|
|
|
final Status reply = PipelineAck.getStatusFromHeader(ack
|
|
|
.getHeaderFlag(i));
|
|
@@ -1162,6 +1167,10 @@ class DataStreamer extends Daemon {
|
|
|
PipelineAck.ECN.CONGESTED) {
|
|
|
congestedNodesFromAck.add(targets[i]);
|
|
|
}
|
|
|
+ if (PipelineAck.getSLOWFromHeader(ack.getHeaderFlag(i)) ==
|
|
|
+ PipelineAck.SLOW.SLOW) {
|
|
|
+ slownodesFromAck.add(targets[i]);
|
|
|
+ }
|
|
|
// Restart will not be treated differently unless it is
|
|
|
// the local node or the only one in the pipeline.
|
|
|
if (PipelineAck.isRestartOOBStatus(reply)) {
|
|
@@ -1191,6 +1200,16 @@ class DataStreamer extends Daemon {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ if (slownodesFromAck.isEmpty()) {
|
|
|
+ if (!slowNodeMap.isEmpty()) {
|
|
|
+ slowNodeMap.clear();
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ markSlowNode(slownodesFromAck);
|
|
|
+ LOG.debug("SlowNodeMap content: {}.", slowNodeMap);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
assert seqno != PipelineAck.UNKOWN_SEQNO :
|
|
|
"Ack for unknown seqno should be a failed ack: " + ack;
|
|
|
if (seqno == DFSPacket.HEART_BEAT_SEQNO) { // a heartbeat ack
|
|
@@ -1257,10 +1276,51 @@ class DataStreamer extends Daemon {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ void markSlowNode(List<DatanodeInfo> slownodesFromAck) throws IOException {
|
|
|
+ Set<DatanodeInfo> discontinuousNodes = new HashSet<>(slowNodeMap.keySet());
|
|
|
+ for (DatanodeInfo slowNode : slownodesFromAck) {
|
|
|
+ if (!slowNodeMap.containsKey(slowNode)) {
|
|
|
+ slowNodeMap.put(slowNode, 1);
|
|
|
+ } else {
|
|
|
+ int oldCount = slowNodeMap.get(slowNode);
|
|
|
+ slowNodeMap.put(slowNode, ++oldCount);
|
|
|
+ }
|
|
|
+ discontinuousNodes.remove(slowNode);
|
|
|
+ }
|
|
|
+ for (DatanodeInfo discontinuousNode : discontinuousNodes) {
|
|
|
+ slowNodeMap.remove(discontinuousNode);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!slowNodeMap.isEmpty()) {
|
|
|
+ for (Map.Entry<DatanodeInfo, Integer> entry : slowNodeMap.entrySet()) {
|
|
|
+ if (entry.getValue() >= markSlowNodeAsBadNodeThreshold) {
|
|
|
+ DatanodeInfo slowNode = entry.getKey();
|
|
|
+ int index = getDatanodeIndex(slowNode);
|
|
|
+ if (index >= 0) {
|
|
|
+ errorState.setBadNodeIndex(index);
|
|
|
+ throw new IOException("Receive reply from slowNode " + slowNode +
|
|
|
+ " for continuous " + markSlowNodeAsBadNodeThreshold +
|
|
|
+ " times, treating it as badNode");
|
|
|
+ }
|
|
|
+ slowNodeMap.remove(entry.getKey());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
void close() {
|
|
|
responderClosed = true;
|
|
|
this.interrupt();
|
|
|
}
|
|
|
+
|
|
|
+ int getDatanodeIndex(DatanodeInfo datanodeInfo) {
|
|
|
+ for (int i = 0; i < targets.length; i++) {
|
|
|
+ if (targets[i].equals(datanodeInfo)) {
|
|
|
+ return i;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
private boolean shouldHandleExternalError(){
|