|
@@ -21,10 +21,12 @@ package org.apache.hadoop.hdfs.server.namenode;
|
|
|
import java.util.concurrent.TimeUnit;
|
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
|
import java.util.concurrent.atomic.AtomicLong;
|
|
|
+import java.util.concurrent.atomic.AtomicReference;
|
|
|
import java.util.concurrent.locks.Condition;
|
|
|
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
|
|
|
|
|
import com.google.common.annotations.VisibleForTesting;
|
|
|
+import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
|
import org.apache.hadoop.log.LogThrottlingHelper;
|
|
|
import org.apache.hadoop.metrics2.lib.MutableRatesWithAggregation;
|
|
@@ -97,8 +99,18 @@ class FSNamesystemLock {
|
|
|
new AtomicInteger(0);
|
|
|
/** Time stamp (ms) of the last time a read lock report was written. */
|
|
|
private final AtomicLong timeStampOfLastReadLockReportMs = new AtomicLong(0);
|
|
|
- /** Longest time (ms) a read lock was held since the last report. */
|
|
|
- private final AtomicLong longestReadLockHeldIntervalMs = new AtomicLong(0);
|
|
|
+ /**
|
|
|
+ * The info (lock held time and stack trace) when longest time (ms) a read
|
|
|
+ * lock was held since the last report.
|
|
|
+ */
|
|
|
+ private final AtomicReference<ReadLockHeldInfo> longestReadLockHeldInfo =
|
|
|
+ new AtomicReference<>(new ReadLockHeldInfo(0, null));
|
|
|
+
|
|
|
+ /**
|
|
|
+ * The stack trace when longest time of the write lock
|
|
|
+ * was held since the last report.
|
|
|
+ */
|
|
|
+ private volatile String longestWriteLockHeldStackTrace;
|
|
|
|
|
|
@VisibleForTesting
|
|
|
static final String OP_NAME_OTHER = "OTHER";
|
|
@@ -172,12 +184,13 @@ class FSNamesystemLock {
|
|
|
final long readLockIntervalMs =
|
|
|
TimeUnit.NANOSECONDS.toMillis(readLockIntervalNanos);
|
|
|
if (needReport && readLockIntervalMs >= this.readLockReportingThresholdMs) {
|
|
|
- long localLongestReadLock;
|
|
|
+ ReadLockHeldInfo localLockHeldInfo;
|
|
|
do {
|
|
|
- localLongestReadLock = longestReadLockHeldIntervalMs.get();
|
|
|
- } while (localLongestReadLock - readLockIntervalMs < 0 &&
|
|
|
- !longestReadLockHeldIntervalMs.compareAndSet(localLongestReadLock,
|
|
|
- readLockIntervalMs));
|
|
|
+ localLockHeldInfo = longestReadLockHeldInfo.get();
|
|
|
+ } while (localLockHeldInfo.getIntervalMs() - readLockIntervalMs < 0 &&
|
|
|
+ !longestReadLockHeldInfo.compareAndSet(localLockHeldInfo,
|
|
|
+ new ReadLockHeldInfo(readLockIntervalMs,
|
|
|
+ StringUtils.getStackTrace(Thread.currentThread()))));
|
|
|
|
|
|
long localTimeStampOfLastReadLockReport;
|
|
|
long nowMs;
|
|
@@ -193,13 +206,13 @@ class FSNamesystemLock {
|
|
|
} while (!timeStampOfLastReadLockReportMs.compareAndSet(
|
|
|
localTimeStampOfLastReadLockReport, nowMs));
|
|
|
int numSuppressedWarnings = numReadLockWarningsSuppressed.getAndSet(0);
|
|
|
- long longestLockHeldIntervalMs =
|
|
|
- longestReadLockHeldIntervalMs.getAndSet(0);
|
|
|
- FSNamesystem.LOG.info("FSNamesystem read lock held for " +
|
|
|
- readLockIntervalMs + " ms via\n" +
|
|
|
- StringUtils.getStackTrace(Thread.currentThread()) +
|
|
|
- "\tNumber of suppressed read-lock reports: " + numSuppressedWarnings +
|
|
|
- "\n\tLongest read-lock held interval: " + longestLockHeldIntervalMs);
|
|
|
+ ReadLockHeldInfo lockHeldInfo = longestReadLockHeldInfo
|
|
|
+ .getAndSet(new ReadLockHeldInfo(0, null));
|
|
|
+ FSNamesystem.LOG.info(
|
|
|
+ "\tNumber of suppressed read-lock reports: {}" +
|
|
|
+ "\n\tLongest read-lock held interval: {}ms via {}",
|
|
|
+ numSuppressedWarnings, lockHeldInfo.getIntervalMs(),
|
|
|
+ lockHeldInfo.getStackTrace());
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -255,6 +268,14 @@ class FSNamesystemLock {
|
|
|
LogAction logAction = LogThrottlingHelper.DO_NOT_LOG;
|
|
|
if (needReport &&
|
|
|
writeLockIntervalMs >= this.writeLockReportingThresholdMs) {
|
|
|
+ SummaryStatistics currentStats =
|
|
|
+ writeLockReportLogger.getCurrentStats("write", 0);
|
|
|
+ double currentMaxTime = currentStats != null ? currentStats.getMax() : 0;
|
|
|
+ if (currentMaxTime < writeLockIntervalMs) {
|
|
|
+ longestWriteLockHeldStackTrace =
|
|
|
+ StringUtils.getStackTrace(Thread.currentThread());
|
|
|
+ }
|
|
|
+
|
|
|
logAction = writeLockReportLogger
|
|
|
.record("write", currentTimeMs, writeLockIntervalMs);
|
|
|
}
|
|
@@ -266,12 +287,12 @@ class FSNamesystemLock {
|
|
|
}
|
|
|
|
|
|
if (logAction.shouldLog()) {
|
|
|
- FSNamesystem.LOG.info("FSNamesystem write lock held for {} ms via {}\t" +
|
|
|
- "Number of suppressed write-lock reports: {}\n\tLongest write-lock " +
|
|
|
- "held interval: {} \n\tTotal suppressed write-lock held time: {}",
|
|
|
- writeLockIntervalMs,
|
|
|
- StringUtils.getStackTrace(Thread.currentThread()),
|
|
|
+ FSNamesystem.LOG.info(
|
|
|
+ "\tNumber of suppressed write-lock reports: {}" +
|
|
|
+ "\n\tLongest write-lock held interval: {}ms via {} " +
|
|
|
+ "\n\tTotal suppressed write-lock held time: {}",
|
|
|
logAction.getCount() - 1, logAction.getStats(0).getMax(),
|
|
|
+ longestWriteLockHeldStackTrace,
|
|
|
logAction.getStats(0).getSum() - writeLockIntervalMs);
|
|
|
}
|
|
|
}
|
|
@@ -323,4 +344,38 @@ class FSNamesystemLock {
|
|
|
org.apache.commons.lang3.StringUtils.capitalize(operationName) +
|
|
|
LOCK_METRIC_SUFFIX;
|
|
|
}
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Read lock Held Info.
|
|
|
+ */
|
|
|
+ private static class ReadLockHeldInfo {
|
|
|
+ /** Read lock held time. */
|
|
|
+ private Long intervalMs;
|
|
|
+ /** The stack trace read lock was held. */
|
|
|
+ private String stackTrace;
|
|
|
+
|
|
|
+ ReadLockHeldInfo(long intervalMs, String stackTrace) {
|
|
|
+ this.intervalMs = intervalMs;
|
|
|
+ this.stackTrace = stackTrace;
|
|
|
+ }
|
|
|
+
|
|
|
+ public Long getIntervalMs() {
|
|
|
+ return this.intervalMs;
|
|
|
+ }
|
|
|
+
|
|
|
+ public String getStackTrace() {
|
|
|
+ return this.stackTrace;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public int hashCode() {
|
|
|
+ return this.intervalMs.hashCode();
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public boolean equals(Object obj) {
|
|
|
+ return obj instanceof ReadLockHeldInfo && ((ReadLockHeldInfo) obj)
|
|
|
+ .getIntervalMs().compareTo(intervalMs) == 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|