Parcourir la source

HADOOP-1990 Regression test instability affects nightly and patch builds

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@581995 13f79535-47bb-0310-9956-ffa450edef68
Jim Kellerman il y a 18 ans
Parent
commit
95a0b91f35

+ 1 - 0
src/contrib/hbase/CHANGES.txt

@@ -64,6 +64,7 @@ Trunk (unreleased changes)
     HADOOP-1941 StopRowFilter throws NPE when passed null row
     HADOOP-1966 Make HBase unit tests more reliable in the Hudson environment.
     HADOOP-1975 HBase tests failing with java.lang.NumberFormatException
+    HADOOP-1990 Regression test instability affects nightly and patch builds
 
   IMPROVEMENTS
     HADOOP-1737 Make HColumnDescriptor data publically members settable

+ 0 - 9
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java

@@ -1049,15 +1049,6 @@ HMasterRegionInterface {
     } catch(Exception iex) {
       LOG.warn("meta scanner", iex);
     }
-    try {
-      // TODO: Maybe do in parallel in its own thread as is done in TaskTracker
-      // if its taking a long time to go down.
-      
-      server.join();                    // Wait for server to finish.
-    } catch(InterruptedException iex) {
-      LOG.warn("server", iex);
-    }
-
     LOG.info("HMaster main thread exiting");
   }
   

+ 0 - 6
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java

@@ -729,12 +729,6 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
     join(this.logRollerThread);
     join(this.cacheFlusherThread);
     join(this.splitOrCompactCheckerThread);
-    try {
-      this.server.join();
-    } catch (InterruptedException e) {
-      // No means of asking server if its done... .so just assume it is even
-      // if an interrupt.
-    }
   }
 
   private void join(final Thread t) {

+ 7 - 0
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HServerInfo.java

@@ -87,6 +87,13 @@ public class HServerInfo implements Writable {
     return startCode;
   }
   
+  /**
+   * @param startCode the startCode to set
+   */
+  public void setStartCode(long startCode) {
+    this.startCode = startCode;
+  }
+
   /** {@inheritDoc} */
   @Override
   public String toString() {

+ 2 - 3
src/contrib/hbase/src/java/org/apache/hadoop/hbase/util/FSUtils.java

@@ -20,7 +20,6 @@
 package org.apache.hadoop.hbase.util;
 
 import java.io.IOException;
-import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -59,11 +58,11 @@ public class FSUtils {
     } catch (IOException e) {
       exception = e.getMessage();
     }
-    LOG.info("Failed file system available test. Thread: " +
-        Thread.currentThread().getName() + ": " + exception);
     
     try {
       if (!available) {
+        LOG.info("Failed file system available test. Thread: " +
+            Thread.currentThread().getName() + ": " + exception);
         fs.close();
       }
         

+ 48 - 5
src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestDFSAbort.java → src/contrib/hbase/src/test/org/apache/hadoop/hbase/DFSAbort.java

@@ -22,14 +22,27 @@ package org.apache.hadoop.hbase;
 import junit.framework.TestSuite;
 import junit.textui.TestRunner;
 
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
+import java.io.PrintWriter;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 
 /**
  * Test ability of HBase to handle DFS failure
  */
-public class TestDFSAbort extends HBaseClusterTestCase {
+public class DFSAbort extends HBaseClusterTestCase {
+  private static final Log LOG =
+    LogFactory.getLog(DFSAbort.class.getName());
 
+  /** constructor */
+  public DFSAbort() {
+    super();
+    
+    // For less frequently updated regions flush after every 2 flushes
+    conf.setInt("hbase.hregion.memcache.optionalflushcount", 2);
+  }
+  
   /** {@inheritDoc} */
   @Override
   public void setUp() throws Exception {
@@ -54,17 +67,47 @@ public class TestDFSAbort extends HBaseClusterTestCase {
       // created a table. Now let's yank the rug out from HBase
       cluster.getDFSCluster().shutdown();
       // Now wait for Mini HBase Cluster to shut down
-      cluster.join();
+//      cluster.join();
+      join();
     } catch (Exception e) {
       e.printStackTrace();
       throw e;
     }
   }
   
+  private void join() {
+    if (this.cluster.regionThreads != null) {
+      synchronized(this.cluster.regionThreads) {
+        for(Thread t: this.cluster.regionThreads) {
+          join(t);
+        }
+      }
+    }
+    join(this.cluster.getMasterThread());
+  }
+
+  private void join(final Thread t) {
+    if (t == null) {
+      return;
+    }
+    for (int i = 0; t.isAlive(); i++) {
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException e) {
+        LOG.info("Continuing...", e);
+      }
+      if (i != 0 && i % 30 == 0) {
+        ReflectionUtils.printThreadInfo(new PrintWriter(System.out),
+            "Automatic Stack Trace every 30 seconds waiting on " +
+            t.getName());
+      }
+    }
+  }
+
   /**
    * @param args unused
    */
   public static void main(@SuppressWarnings("unused") String[] args) {
-    TestRunner.run(new TestSuite(TestDFSAbort.class));
+    TestRunner.run(new TestSuite(DFSAbort.class));
   }
 }

+ 9 - 0
src/contrib/hbase/src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java

@@ -85,6 +85,7 @@ public class MiniHBaseCluster implements HConstants {
   /**
    * Starts a MiniHBaseCluster on top of an existing HDFSCluster
    *
+   *<pre>
    ****************************************************************************
    *            *  *  *  *  *  N O T E  *  *  *  *  *
    *
@@ -93,6 +94,7 @@ public class MiniHBaseCluster implements HConstants {
    *
    *            *  *  *  *  *  N O T E  *  *  *  *  *
    ****************************************************************************
+   *</pre>
    *
    * @param conf
    * @param nRegionNodes
@@ -286,6 +288,13 @@ public class MiniHBaseCluster implements HConstants {
     return this.masterThread.getMaster().getMasterAddress();
   }
 
+  /**
+   * @return the thread running the HMaster
+   */
+  public MasterThread getMasterThread() {
+    return this.masterThread;
+  }
+  
   /**
    * Cause a region server to exit without cleaning up
    *

+ 25 - 18
src/contrib/hbase/src/test/org/apache/hadoop/hbase/MultiRegionTable.java

@@ -55,6 +55,10 @@ public class MultiRegionTable extends HBaseTestCase {
       MiniHBaseCluster cluster, FileSystem localFs, String tableName,
       String columnName) throws IOException {
     
+    final int retries = 10; 
+    final long waitTime =
+      conf.getLong("hbase.master.meta.thread.rescanfrequency", 10L * 1000L);
+    
     // This size should make it so we always split using the addContent
     // below.  After adding all data, the first region is 1.3M. Should
     // set max filesize to be <= 1M.
@@ -62,7 +66,6 @@ public class MultiRegionTable extends HBaseTestCase {
     assertTrue(conf.getLong("hbase.hregion.max.filesize",
       HConstants.DEFAULT_MAX_FILE_SIZE) <= 1024 * 1024);
 
-    final int retries = 10; 
     FileSystem fs = (cluster.getDFSCluster() == null) ?
       localFs : cluster.getDFSCluster().getFileSystem();
     assertNotNull(fs);
@@ -89,18 +92,18 @@ public class MultiRegionTable extends HBaseTestCase {
     
     // Now, wait until split makes it into the meta table.
     
-    for (int i = 0;
-      i < retries && (count(meta, HConstants.COLUMN_FAMILY_STR) <= count);
-      i++) {
-      
+    int oldCount = count;
+    for (int i = 0; i < retries;  i++) {
+      count = count(meta, HConstants.COLUMN_FAMILY_STR);
+      if (count > oldCount) {
+        break;
+      }
       try {
-        Thread.sleep(5000);
+        Thread.sleep(waitTime);
       } catch (InterruptedException e) {
         // continue
       }
     }
-    int oldCount = count;
-    count = count(meta, HConstants.COLUMN_FAMILY_STR);
     if (count <= oldCount) {
       throw new IOException("Failed waiting on splits to show up");
     }
@@ -126,7 +129,7 @@ public class MultiRegionTable extends HBaseTestCase {
     
     // Recalibrate will cause us to wait on new regions' deployment
     
-    recalibrate(t, new Text(columnName), retries);
+    recalibrate(t, new Text(columnName), retries, waitTime);
     
     // Compact a region at a time so we can test case where one region has
     // no references but the other still has some
@@ -138,7 +141,7 @@ public class MultiRegionTable extends HBaseTestCase {
     
     while (getSplitParentInfo(meta, parent).size() == 3) {
       try {
-        Thread.sleep(5000);
+        Thread.sleep(waitTime);
       } catch (InterruptedException e) {
         // continue
       }
@@ -153,12 +156,13 @@ public class MultiRegionTable extends HBaseTestCase {
     // Now wait until parent disappears.
     
     LOG.info("Waiting on parent " + parent.getRegionName() + " to disappear");
-    for (int i = 0;
-      i < retries && getSplitParentInfo(meta, parent) != null;
-      i++) {
+    for (int i = 0; i < retries; i++) {
+      if (getSplitParentInfo(meta, parent) == null) {
+        break;
+      }
       
       try {
-        Thread.sleep(5000);
+        Thread.sleep(waitTime);
       } catch (InterruptedException e) {
         // continue
       }
@@ -167,9 +171,12 @@ public class MultiRegionTable extends HBaseTestCase {
     
     // Assert cleaned up.
     
-    for (int i = 0; i < retries && fs.exists(parentDir); i++) {
+    for (int i = 0; i < retries; i++) {
+      if (!fs.exists(parentDir)) {
+        break;
+      }
       try {
-        Thread.sleep(5000);
+        Thread.sleep(waitTime);
       } catch (InterruptedException e) {
         // continue
       }
@@ -243,7 +250,7 @@ public class MultiRegionTable extends HBaseTestCase {
    * @param retries
    */
   private static void recalibrate(final HTable t, final Text column,
-      final int retries) throws IOException {
+      final int retries, final long waitTime) throws IOException {
     
     for (int i = 0; i < retries; i++) {
       try {
@@ -260,7 +267,7 @@ public class MultiRegionTable extends HBaseTestCase {
       } catch (NotServingRegionException x) {
         System.out.println("it's alright");
         try {
-          Thread.sleep(5000);
+          Thread.sleep(waitTime);
         } catch (InterruptedException e) {
           // continue
         }

+ 15 - 15
src/contrib/hbase/src/test/org/apache/hadoop/hbase/StaticTestEnvironment.java

@@ -86,25 +86,25 @@ public class StaticTestEnvironment {
       } else if(value.equalsIgnoreCase("WARN")) {
         logLevel = Level.WARN;
       }
+    }
 
-      ConsoleAppender consoleAppender = null;
-      for(Enumeration<Appender> e = rootLogger.getAllAppenders();
-          e.hasMoreElements();) {
+    ConsoleAppender consoleAppender = null;
+    for(Enumeration<Appender> e = rootLogger.getAllAppenders();
+    e.hasMoreElements();) {
 
-        Appender a = e.nextElement();
-        if(a instanceof ConsoleAppender) {
-          consoleAppender = (ConsoleAppender)a;
-          break;
-        }
+      Appender a = e.nextElement();
+      if(a instanceof ConsoleAppender) {
+        consoleAppender = (ConsoleAppender)a;
+        break;
       }
-      if(consoleAppender != null) {
-        Layout layout = consoleAppender.getLayout();
-        if(layout instanceof PatternLayout) {
-          PatternLayout consoleLayout = (PatternLayout)layout;
-          consoleLayout.setConversionPattern("%d %-5p [%t] %l: %m%n");
-        }
+    }
+    if(consoleAppender != null) {
+      Layout layout = consoleAppender.getLayout();
+      if(layout instanceof PatternLayout) {
+        PatternLayout consoleLayout = (PatternLayout)layout;
+        consoleLayout.setConversionPattern("%d %-5p [%t] %l: %m%n");
       }
-    }    
+    }
     Logger.getLogger(
         HBaseTestCase.class.getPackage().getName()).setLevel(logLevel);
   }

+ 4 - 0
src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestLogRolling.java

@@ -68,6 +68,10 @@ public class TestLogRolling extends HBaseTestCase {
       // Increase the amount of time between client retries
       conf.setLong("hbase.client.pause", 15 * 1000);
 
+      // Reduce thread wake frequency so that other threads can get
+      // a chance to run.
+      conf.setInt(HConstants.THREAD_WAKE_FREQUENCY, 2 * 1000);
+      
       String className = this.getClass().getName();
       StringBuilder v = new StringBuilder(className);
       while (v.length() < 1000) {