瀏覽代碼

HDFS-9145. Tracking methods that hold FSNamesytemLock for too long. Contributed by Mingliang Liu.

(cherry picked from commit 5c5362980c429a5ad0b58e0dd1933de9cde7f369)
Haohui Mai 9 年之前
父節點
當前提交
0d8a1636ef

+ 4 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java

@@ -195,6 +195,10 @@ public abstract class GenericTestUtils {
       logger.removeAppender(appender);
 
     }
+
+    public void clearOutput() {
+      sw.getBuffer().setLength(0);
+    }
   }
   
   

+ 21 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -1472,6 +1472,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     return Util.stringCollectionAsURIs(dirNames);
   }
 
+  /** Threshold (ms) for long holding write lock report. */
+  static final short WRITELOCK_REPORTING_THRESHOLD = 1000;
+  /** Last time stamp for write lock. Keep the longest one for multi-entrance.*/
+  private long writeLockHeldTimeStamp;
+
   @Override
   public void readLock() {
     this.fsLock.readLock().lock();
@@ -1483,14 +1488,30 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
   @Override
   public void writeLock() {
     this.fsLock.writeLock().lock();
+    if (fsLock.getWriteHoldCount() == 1) {
+      writeLockHeldTimeStamp = monotonicNow();
+    }
   }
   @Override
   public void writeLockInterruptibly() throws InterruptedException {
     this.fsLock.writeLock().lockInterruptibly();
+    if (fsLock.getWriteHoldCount() == 1) {
+      writeLockHeldTimeStamp = monotonicNow();
+    }
   }
   @Override
   public void writeUnlock() {
+    final boolean needReport = fsLock.getWriteHoldCount() == 1 &&
+        fsLock.isWriteLockedByCurrentThread();
     this.fsLock.writeLock().unlock();
+
+    if (needReport) {
+      long writeLockInterval = monotonicNow() - writeLockHeldTimeStamp;
+      if (writeLockInterval >= WRITELOCK_REPORTING_THRESHOLD) {
+        LOG.info("FSNamesystem write lock held for " + writeLockInterval +
+            " ms via\n" + StringUtils.getStackTrace(Thread.currentThread()));
+      }
+    }
   }
   @Override
   public boolean hasWriteLock() {

+ 56 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java

@@ -36,6 +36,9 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
+import org.apache.log4j.Level;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Test;
@@ -241,4 +244,57 @@ public class TestFSNamesystem {
     Assert.assertEquals("Expected number of blocked thread not found",
                         threadCount, rwLock.getQueueLength());
   }
+
+  /**
+   * Test when FSNamesystem lock is held for a long time, logger will report it.
+   */
+  @Test(timeout=45000)
+  public void testFSLockLongHoldingReport() throws Exception {
+    Configuration conf = new Configuration();
+    FSImage fsImage = Mockito.mock(FSImage.class);
+    FSEditLog fsEditLog = Mockito.mock(FSEditLog.class);
+    Mockito.when(fsImage.getEditLog()).thenReturn(fsEditLog);
+    FSNamesystem fsn = new FSNamesystem(conf, fsImage);
+
+    LogCapturer logs = LogCapturer.captureLogs(FSNamesystem.LOG);
+    GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.INFO);
+
+    // Don't report if the write lock is held for a short time
+    fsn.writeLock();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2);
+    fsn.writeUnlock();
+    assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+
+
+    // Report if the write lock is held for a long time
+    fsn.writeLock();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100);
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+
+    // Report if the write lock is held (interruptibly) for a long time
+    fsn.writeLockInterruptibly();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD + 100);
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+
+    // Report if it's held for a long time when re-entering write lock
+    fsn.writeLock();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1);
+    fsn.writeLockInterruptibly();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2 + 1);
+    fsn.writeLock();
+    Thread.sleep(FSNamesystem.WRITELOCK_REPORTING_THRESHOLD / 2);
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertFalse(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+    logs.clearOutput();
+    fsn.writeUnlock();
+    assertTrue(logs.getOutput().contains(GenericTestUtils.getMethodName()));
+  }
 }