|
@@ -57,6 +57,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
|
|
import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
|
|
import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
|
|
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
|
import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
|
|
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
|
import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
|
|
|
|
+import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
|
|
import org.apache.hadoop.hdfs.server.protocol.SlowPeerReports;
|
|
import org.apache.hadoop.hdfs.server.protocol.SlowPeerReports;
|
|
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
|
|
import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
|
|
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
|
|
import org.apache.hadoop.hdfs.server.protocol.StorageReport;
|
|
@@ -497,11 +498,15 @@ class BPServiceActor implements Runnable {
|
|
.getVolumeFailureSummary();
|
|
.getVolumeFailureSummary();
|
|
int numFailedVolumes = volumeFailureSummary != null ?
|
|
int numFailedVolumes = volumeFailureSummary != null ?
|
|
volumeFailureSummary.getFailedStorageLocations().length : 0;
|
|
volumeFailureSummary.getFailedStorageLocations().length : 0;
|
|
- final boolean slowPeersReportDue = scheduler.isSlowPeersReportDue(now);
|
|
|
|
|
|
+ final boolean outliersReportDue = scheduler.isOutliersReportDue(now);
|
|
final SlowPeerReports slowPeers =
|
|
final SlowPeerReports slowPeers =
|
|
- slowPeersReportDue && dn.getPeerMetrics() != null ?
|
|
|
|
|
|
+ outliersReportDue && dn.getPeerMetrics() != null ?
|
|
SlowPeerReports.create(dn.getPeerMetrics().getOutliers()) :
|
|
SlowPeerReports.create(dn.getPeerMetrics().getOutliers()) :
|
|
SlowPeerReports.EMPTY_REPORT;
|
|
SlowPeerReports.EMPTY_REPORT;
|
|
|
|
+ final SlowDiskReports slowDisks =
|
|
|
|
+ outliersReportDue && dn.getDiskMetrics() != null ?
|
|
|
|
+ SlowDiskReports.create(dn.getDiskMetrics().getDiskOutliersStats()) :
|
|
|
|
+ SlowDiskReports.EMPTY_REPORT;
|
|
HeartbeatResponse response = bpNamenode.sendHeartbeat(bpRegistration,
|
|
HeartbeatResponse response = bpNamenode.sendHeartbeat(bpRegistration,
|
|
reports,
|
|
reports,
|
|
dn.getFSDataset().getCacheCapacity(),
|
|
dn.getFSDataset().getCacheCapacity(),
|
|
@@ -511,11 +516,12 @@ class BPServiceActor implements Runnable {
|
|
numFailedVolumes,
|
|
numFailedVolumes,
|
|
volumeFailureSummary,
|
|
volumeFailureSummary,
|
|
requestBlockReportLease,
|
|
requestBlockReportLease,
|
|
- slowPeers);
|
|
|
|
|
|
+ slowPeers,
|
|
|
|
+ slowDisks);
|
|
|
|
|
|
- if (slowPeersReportDue) {
|
|
|
|
|
|
+ if (outliersReportDue) {
|
|
// If the report was due and successfully sent, schedule the next one.
|
|
// If the report was due and successfully sent, schedule the next one.
|
|
- scheduler.scheduleNextSlowPeerReport();
|
|
|
|
|
|
+ scheduler.scheduleNextOutlierReport();
|
|
}
|
|
}
|
|
return response;
|
|
return response;
|
|
}
|
|
}
|
|
@@ -1095,7 +1101,7 @@ class BPServiceActor implements Runnable {
|
|
boolean resetBlockReportTime = true;
|
|
boolean resetBlockReportTime = true;
|
|
|
|
|
|
@VisibleForTesting
|
|
@VisibleForTesting
|
|
- volatile long nextSlowPeersReportTime = monotonicNow();
|
|
|
|
|
|
+ volatile long nextOutliersReportTime = monotonicNow();
|
|
|
|
|
|
private final AtomicBoolean forceFullBlockReport =
|
|
private final AtomicBoolean forceFullBlockReport =
|
|
new AtomicBoolean(false);
|
|
new AtomicBoolean(false);
|
|
@@ -1103,14 +1109,14 @@ class BPServiceActor implements Runnable {
|
|
private final long heartbeatIntervalMs;
|
|
private final long heartbeatIntervalMs;
|
|
private final long lifelineIntervalMs;
|
|
private final long lifelineIntervalMs;
|
|
private final long blockReportIntervalMs;
|
|
private final long blockReportIntervalMs;
|
|
- private final long slowPeersReportIntervalMs;
|
|
|
|
|
|
+ private final long outliersReportIntervalMs;
|
|
|
|
|
|
Scheduler(long heartbeatIntervalMs, long lifelineIntervalMs,
|
|
Scheduler(long heartbeatIntervalMs, long lifelineIntervalMs,
|
|
- long blockReportIntervalMs, long slowPeersReportIntervalMs) {
|
|
|
|
|
|
+ long blockReportIntervalMs, long outliersReportIntervalMs) {
|
|
this.heartbeatIntervalMs = heartbeatIntervalMs;
|
|
this.heartbeatIntervalMs = heartbeatIntervalMs;
|
|
this.lifelineIntervalMs = lifelineIntervalMs;
|
|
this.lifelineIntervalMs = lifelineIntervalMs;
|
|
this.blockReportIntervalMs = blockReportIntervalMs;
|
|
this.blockReportIntervalMs = blockReportIntervalMs;
|
|
- this.slowPeersReportIntervalMs = slowPeersReportIntervalMs;
|
|
|
|
|
|
+ this.outliersReportIntervalMs = outliersReportIntervalMs;
|
|
scheduleNextLifeline(nextHeartbeatTime);
|
|
scheduleNextLifeline(nextHeartbeatTime);
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1143,8 +1149,8 @@ class BPServiceActor implements Runnable {
|
|
lastBlockReportTime = blockReportTime;
|
|
lastBlockReportTime = blockReportTime;
|
|
}
|
|
}
|
|
|
|
|
|
- void scheduleNextSlowPeerReport() {
|
|
|
|
- nextSlowPeersReportTime = monotonicNow() + slowPeersReportIntervalMs;
|
|
|
|
|
|
+ void scheduleNextOutlierReport() {
|
|
|
|
+ nextOutliersReportTime = monotonicNow() + outliersReportIntervalMs;
|
|
}
|
|
}
|
|
|
|
|
|
long getLastHearbeatTime() {
|
|
long getLastHearbeatTime() {
|
|
@@ -1173,8 +1179,8 @@ class BPServiceActor implements Runnable {
|
|
return nextBlockReportTime - curTime <= 0;
|
|
return nextBlockReportTime - curTime <= 0;
|
|
}
|
|
}
|
|
|
|
|
|
- boolean isSlowPeersReportDue(long curTime) {
|
|
|
|
- return nextSlowPeersReportTime - curTime <= 0;
|
|
|
|
|
|
+ boolean isOutliersReportDue(long curTime) {
|
|
|
|
+ return nextOutliersReportTime - curTime <= 0;
|
|
}
|
|
}
|
|
|
|
|
|
void forceFullBlockReportNow() {
|
|
void forceFullBlockReportNow() {
|