Ver código fonte

HDFS-15808. Add metrics for FSNamesystem read/write lock hold long time. (#2668) Contributed by tomscut.

(cherry picked from commit 9cb51bf106802c78b1400fba9f1d1c7e772dd5e7)
tomscut 4 anos atrás
pai
commit
03ac2e41c0

+ 14 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -4440,6 +4440,20 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     return fsLock.getQueueLength();
   }
 
+  @Metric(value = {"ReadLockLongHoldCount", "The number of time " +
+          "the read lock has been held for longer than the threshold"},
+          type = Metric.Type.COUNTER)
+  public long getNumOfReadLockLongHold() {
+    return fsLock.getNumOfReadLockLongHold();
+  }
+
+  @Metric(value = {"WriteLockLongHoldCount", "The number of time " +
+          "the write lock has been held for longer than the threshold"},
+          type = Metric.Type.COUNTER)
+  public long getNumOfWriteLockLongHold() {
+    return fsLock.getNumOfWriteLockLongHold();
+  }
+
   int getNumberOfDatanodes(DatanodeReportType type) {
     readLock();
     try {

+ 34 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystemLock.java

@@ -101,6 +101,16 @@ class FSNamesystemLock {
   private final AtomicLong timeStampOfLastReadLockReportMs = new AtomicLong(0);
   /** Longest time (ms) a read lock was held since the last report. */
   private final AtomicLong longestReadLockHeldIntervalMs = new AtomicLong(0);
+  /**
+   * The number of time the read lock
+   * has been held longer than the threshold.
+   */
+  private final AtomicLong numReadLockLongHold = new AtomicLong(0);
+  /**
+   * The number of time the write lock
+   * has been held for longer than the threshold.
+   */
+  private final AtomicLong numWriteLockLongHold = new AtomicLong(0);
 
   @VisibleForTesting
   static final String OP_NAME_OTHER = "OTHER";
@@ -168,6 +178,7 @@ class FSNamesystemLock {
     final long readLockIntervalMs =
         TimeUnit.NANOSECONDS.toMillis(readLockIntervalNanos);
     if (needReport && readLockIntervalMs >= this.readLockReportingThresholdMs) {
+      numReadLockLongHold.incrementAndGet();
       long localLongestReadLock;
       do {
         localLongestReadLock = longestReadLockHeldIntervalMs.get();
@@ -245,6 +256,7 @@ class FSNamesystemLock {
     LogAction logAction = LogThrottlingHelper.DO_NOT_LOG;
     if (needReport &&
         writeLockIntervalMs >= this.writeLockReportingThresholdMs) {
+      numWriteLockLongHold.incrementAndGet();
       logAction = writeLockReportLogger
           .record("write", currentTimeMs, writeLockIntervalMs);
     }
@@ -282,6 +294,28 @@ class FSNamesystemLock {
     return coarseLock.writeLock().newCondition();
   }
 
+  /**
+   * Returns the number of time the read lock
+   * has been held longer than the threshold.
+   *
+   * @return long - Number of time the read lock
+   * has been held longer than the threshold
+   */
+  public long getNumOfReadLockLongHold() {
+    return numReadLockLongHold.get();
+  }
+
+  /**
+   * Returns the number of time the write lock
+   * has been held longer than the threshold.
+   *
+   * @return long - Number of time the write lock
+   * has been held longer than the threshold.
+   */
+  public long getNumOfWriteLockLongHold() {
+    return numWriteLockLongHold.get();
+  }
+
   /**
    * Returns the QueueLength of waiting threads.
    *