Forráskód Böngészése

HADOOP-8755. Print thread dump when tests fail due to timeout. Contributed by Andrey Klochkov.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1384627 13f79535-47bb-0310-9956-ffa450edef68
Aaron Myers 12 éve
szülő
commit
d8e386f01a

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -226,6 +226,9 @@ Release 2.0.3-alpha - Unreleased
     HADOOP-8789. Tests setLevel(Level.OFF) should be Level.ERROR.
     (Andy Isaacson via eli)
 
+    HADOOP-8755. Print thread dump when tests fail due to timeout. (Andrey
+    Klochkov via atm)
+
   OPTIMIZATIONS
 
   BUG FIXES

+ 181 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TestTimedOutTestsListener.java

@@ -0,0 +1,181 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.test;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.concurrent.BrokenBarrierException;
+import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.notification.Failure;
+
+public class TestTimedOutTestsListener {
+
+  public static class Deadlock {
+    private CyclicBarrier barrier = new CyclicBarrier(6);
+  
+    public Deadlock() {
+      DeadlockThread[] dThreads = new DeadlockThread[6];
+  
+      Monitor a = new Monitor("a");
+      Monitor b = new Monitor("b");
+      Monitor c = new Monitor("c");
+      dThreads[0] = new DeadlockThread("MThread-1", a, b);
+      dThreads[1] = new DeadlockThread("MThread-2", b, c);
+      dThreads[2] = new DeadlockThread("MThread-3", c, a);
+  
+      Lock d = new ReentrantLock();
+      Lock e = new ReentrantLock();
+      Lock f = new ReentrantLock();
+  
+      dThreads[3] = new DeadlockThread("SThread-4", d, e);
+      dThreads[4] = new DeadlockThread("SThread-5", e, f);
+      dThreads[5] = new DeadlockThread("SThread-6", f, d);
+  
+      // make them daemon threads so that the test will exit
+      for (int i = 0; i < 6; i++) {
+        dThreads[i].setDaemon(true);
+        dThreads[i].start();
+      }
+    }
+  
+    class DeadlockThread extends Thread {
+      private Lock lock1 = null;
+  
+      private Lock lock2 = null;
+  
+      private Monitor mon1 = null;
+  
+      private Monitor mon2 = null;
+  
+      private boolean useSync;
+  
+      DeadlockThread(String name, Lock lock1, Lock lock2) {
+        super(name);
+        this.lock1 = lock1;
+        this.lock2 = lock2;
+        this.useSync = true;
+      }
+  
+      DeadlockThread(String name, Monitor mon1, Monitor mon2) {
+        super(name);
+        this.mon1 = mon1;
+        this.mon2 = mon2;
+        this.useSync = false;
+      }
+  
+      public void run() {
+        if (useSync) {
+          syncLock();
+        } else {
+          monitorLock();
+        }
+      }
+  
+      private void syncLock() {
+        lock1.lock();
+        try {
+          try {
+            barrier.await();
+          } catch (Exception e) {
+          }
+          goSyncDeadlock();
+        } finally {
+          lock1.unlock();
+        }
+      }
+  
+      private void goSyncDeadlock() {
+        try {
+          barrier.await();
+        } catch (Exception e) {
+        }
+        lock2.lock();
+        throw new RuntimeException("should not reach here.");
+      }
+  
+      private void monitorLock() {
+        synchronized (mon1) {
+          try {
+            barrier.await();
+          } catch (Exception e) {
+          }
+          goMonitorDeadlock();
+        }
+      }
+  
+      private void goMonitorDeadlock() {
+        try {
+          barrier.await();
+        } catch (Exception e) {
+        }
+        synchronized (mon2) {
+          throw new RuntimeException(getName() + " should not reach here.");
+        }
+      }
+    }
+  
+    class Monitor {
+      String name;
+  
+      Monitor(String name) {
+        this.name = name;
+      }
+    }
+  
+  }
+
+  @Test(timeout=500)
+  public void testThreadDumpAndDeadlocks() throws Exception {
+    new Deadlock();
+    String s = null;
+    while (true) {
+      s = TimedOutTestsListener.buildDeadlockInfo();
+      if (s != null)
+        break;
+      Thread.sleep(100);
+    }
+    
+    Assert.assertEquals(3, countStringOccurrences(s, "BLOCKED"));
+    
+    Failure failure = new Failure(
+        null, new Exception(TimedOutTestsListener.TEST_TIMED_OUT_PREFIX));
+    StringWriter writer = new StringWriter();
+    new TimedOutTestsListener(new PrintWriter(writer)).testFailure(failure);
+    String out = writer.toString();
+    
+    Assert.assertTrue(out.contains("THREAD DUMP"));
+    Assert.assertTrue(out.contains("DEADLOCKS DETECTED"));
+    
+    System.out.println(out);
+  }
+
+  private int countStringOccurrences(String s, String substr) {
+    int n = 0;
+    int index = 0;
+    while ((index = s.indexOf(substr, index) + 1) != 0) {
+      n++;
+    }
+    return n;
+  }
+
+}

+ 166 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/TimedOutTestsListener.java

@@ -0,0 +1,166 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.test;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.lang.management.LockInfo;
+import java.lang.management.ManagementFactory;
+import java.lang.management.MonitorInfo;
+import java.lang.management.ThreadInfo;
+import java.lang.management.ThreadMXBean;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Map;
+
+import org.junit.runner.notification.Failure;
+import org.junit.runner.notification.RunListener;
+
+/**
+ * JUnit run listener which prints full thread dump into System.err
+ * in case a test is failed due to timeout.
+ */
+public class TimedOutTestsListener extends RunListener {
+
+  static final String TEST_TIMED_OUT_PREFIX = "test timed out after";
+  
+  private static String INDENT = "    ";
+
+  private final PrintWriter output;
+  
+  public TimedOutTestsListener() {
+    this.output = new PrintWriter(System.err);
+  }
+  
+  public TimedOutTestsListener(PrintWriter output) {
+    this.output = output;
+  }
+
+  @Override
+  public void testFailure(Failure failure) throws Exception {
+    if (failure != null && failure.getMessage() != null 
+        && failure.getMessage().startsWith(TEST_TIMED_OUT_PREFIX)) {
+      output.println("====> TEST TIMED OUT. PRINTING THREAD DUMP. <====");
+      output.println();
+      DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
+      output.println(String.format("Timestamp: %s", dateFormat.format(new Date())));
+      output.println();
+      output.println(buildThreadDump());
+      
+      String deadlocksInfo = buildDeadlockInfo();
+      if (deadlocksInfo != null) {
+        output.println("====> DEADLOCKS DETECTED <====");
+        output.println();
+        output.println(deadlocksInfo);
+      }
+    }
+  }
+
+  static String buildThreadDump() {
+    StringBuilder dump = new StringBuilder();
+    Map<Thread, StackTraceElement[]> stackTraces = Thread.getAllStackTraces();
+    for (Map.Entry<Thread, StackTraceElement[]> e : stackTraces.entrySet()) {
+      Thread thread = e.getKey();
+      dump.append(String.format(
+          "\"%s\" %s prio=%d tid=%d %s\njava.lang.Thread.State: %s",
+          thread.getName(),
+          (thread.isDaemon() ? "daemon" : ""),
+          thread.getPriority(),
+          thread.getId(),
+          Thread.State.WAITING.equals(thread.getState()) ? 
+              "in Object.wait()" : thread.getState().name().toLowerCase(),
+          Thread.State.WAITING.equals(thread.getState()) ? 
+              "WAITING (on object monitor)" : thread.getState()));
+      for (StackTraceElement stackTraceElement : e.getValue()) {
+        dump.append("\n        at ");
+        dump.append(stackTraceElement);
+      }
+      dump.append("\n");
+    }
+    return dump.toString();
+  }
+  
+  static String buildDeadlockInfo() {
+    ThreadMXBean threadBean = ManagementFactory.getThreadMXBean();
+    long[] threadIds = threadBean.findMonitorDeadlockedThreads();
+    if (threadIds != null && threadIds.length > 0) {
+      StringWriter stringWriter = new StringWriter();
+      PrintWriter out = new PrintWriter(stringWriter);
+      
+      ThreadInfo[] infos = threadBean.getThreadInfo(threadIds, true, true);
+      for (ThreadInfo ti : infos) {
+        printThreadInfo(ti, out);
+        printLockInfo(ti.getLockedSynchronizers(), out);
+        out.println();
+      }
+      
+      out.close();
+      return stringWriter.toString();
+    } else {
+      return null;
+    }
+  }
+  
+  private static void printThreadInfo(ThreadInfo ti, PrintWriter out) {
+    // print thread information
+    printThread(ti, out);
+
+    // print stack trace with locks
+    StackTraceElement[] stacktrace = ti.getStackTrace();
+    MonitorInfo[] monitors = ti.getLockedMonitors();
+    for (int i = 0; i < stacktrace.length; i++) {
+      StackTraceElement ste = stacktrace[i];
+      out.println(INDENT + "at " + ste.toString());
+      for (MonitorInfo mi : monitors) {
+        if (mi.getLockedStackDepth() == i) {
+          out.println(INDENT + "  - locked " + mi);
+        }
+      }
+    }
+    out.println();
+  }
+
+  private static void printThread(ThreadInfo ti, PrintWriter out) {
+    out.print("\"" + ti.getThreadName() + "\"" + " Id="
+        + ti.getThreadId() + " in " + ti.getThreadState());
+    if (ti.getLockName() != null) {
+      out.print(" on lock=" + ti.getLockName());
+    }
+    if (ti.isSuspended()) {
+      out.print(" (suspended)");
+    }
+    if (ti.isInNative()) {
+      out.print(" (running in native)");
+    }
+    out.println();
+    if (ti.getLockOwnerName() != null) {
+      out.println(INDENT + " owned by " + ti.getLockOwnerName() + " Id="
+          + ti.getLockOwnerId());
+    }
+  }
+
+  private static void printLockInfo(LockInfo[] locks, PrintWriter out) {
+    out.println(INDENT + "Locked synchronizers: count = " + locks.length);
+    for (LockInfo li : locks) {
+      out.println(INDENT + "  - " + li);
+    }
+    out.println();
+  }
+  
+}

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml

@@ -301,6 +301,12 @@
             <java.security.krb5.conf>${project.build.directory}/test-classes/krb5.conf</java.security.krb5.conf>
             <kerberos.realm>${kerberos.realm}</kerberos.realm>
           </systemPropertyVariables>
+          <properties>
+            <property>
+              <name>listener</name>
+              <value>org.apache.hadoop.test.TimedOutTestsListener</value>
+            </property>
+          </properties>
           <excludes>
             <exclude>**/${test.exclude}.java</exclude>
             <exclude>${test.exclude.pattern}</exclude>

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs/pom.xml

@@ -189,6 +189,12 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
             <startKdc>${startKdc}</startKdc>
             <kdc.resource.dir>${kdc.resource.dir}</kdc.resource.dir>
           </systemPropertyVariables>
+          <properties>
+            <property>
+              <name>listener</name>
+              <value>org.apache.hadoop.test.TimedOutTestsListener</value>
+            </property>
+          </properties>
         </configuration>
       </plugin>
       <plugin>

+ 12 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml

@@ -172,6 +172,18 @@
           <effort>Max</effort>
        </configuration>
      </plugin>
+     <plugin>
+       <groupId>org.apache.maven.plugins</groupId>
+       <artifactId>maven-surefire-plugin</artifactId>
+       <configuration>
+         <properties>
+           <property>
+             <name>listener</name>
+             <value>org.apache.hadoop.test.TimedOutTestsListener</value>
+           </property>
+         </properties>
+       </configuration>
+     </plugin>
     </plugins>
   </build>
  

+ 12 - 0
hadoop-mapreduce-project/pom.xml

@@ -220,6 +220,18 @@
           </includes>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <properties>
+            <property>
+              <name>listener</name>
+              <value>org.apache.hadoop.test.TimedOutTestsListener</value>
+            </property>
+          </properties>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 

+ 13 - 1
hadoop-yarn-project/hadoop-yarn/pom.xml

@@ -154,6 +154,18 @@
           </includes>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <properties>
+            <property>
+              <name>listener</name>
+              <value>org.apache.hadoop.test.TimedOutTestsListener</value>
+            </property>
+          </properties>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 
@@ -187,4 +199,4 @@
     <module>hadoop-yarn-site</module>
     <module>hadoop-yarn-client</module>
   </modules>
-</project>
+</project>