Explorar o código

HADOOP-2490 Failure in nightly #346
Add one fix and more logging to help diagnose the failures up on hudson.


git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@610237 13f79535-47bb-0310-9956-ffa450edef68

Michael Stack %!s(int64=17) %!d(string=hai) anos
pai
achega
1cb8c48110

+ 1 - 0
src/contrib/hbase/CHANGES.txt

@@ -105,6 +105,7 @@ Trunk (unreleased changes)
    HADOOP-2507 REST servlet does not properly base64 row keys and column names
                (Bryan Duxbury via Stack)
    HADOOP-2530 Missing type in new hbase custom RPC serializer
+   HADOOP-2490 Failure in nightly #346 (Added debugging of hudson failures).
    
   IMPROVEMENTS
    HADOOP-2401 Add convenience put method that takes writable

+ 11 - 6
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java

@@ -462,12 +462,17 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
                   !pendingRegions.contains(info.getRegionName())
               )
           )
-      ) {
+        ) {
 
         // The current assignment is no good
         if (LOG.isDebugEnabled()) {
           LOG.debug("Current assignment of " + info.getRegionName() +
-          " is no good");
+            " is no good: storedInfo: " + storedInfo + ", startCode: " +
+            startCode + ", storedInfo.startCode: " +
+            ((storedInfo != null)? storedInfo.getStartCode(): -1) +
+            ", unassignedRegions: " + unassignedRegions.containsKey(info) +
+            ", pendingRegions: " +
+            pendingRegions.contains(info.getRegionName()));
         }
         // Recover the region server's log if there is one.
         // This is only done from here if we are restarting and there is stale
@@ -1026,9 +1031,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
     final String threadName = "HMaster";
     Thread.currentThread().setName(threadName);
     startServiceThreads();
-    /*
-     * Main processing loop
-     */
+    /* Main processing loop */
     try {
       for (RegionServerOperation op = null; !closed.get(); ) {
         if (shutdownRequested && serversToServerInfo.size() == 0) {
@@ -1037,7 +1040,6 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
         }
         if (rootRegionLocation.get() != null) {
           // We can't process server shutdowns unless the root region is online 
-
           op = this.delayedToDoQueue.poll();
         }
         if (op == null ) {
@@ -1179,6 +1181,9 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
       this.closed.set(true);
       LOG.error("Failed startup", e);
     }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Started service threads");
+    }
   }
 
   /*

+ 2 - 1
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java

@@ -1057,7 +1057,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
    */
   private MapWritable reportForDuty() throws IOException {
     if (LOG.isDebugEnabled()) {
-      LOG.debug("Telling master we are up");
+      LOG.debug("Telling master at " +
+        conf.get(MASTER_ADDRESS) + " that we are up");
     }
     // Do initial RPC setup.
     this.hbaseMaster = (HMasterRegionInterface)HbaseRPC.waitForProxy(

+ 22 - 1
src/contrib/hbase/src/java/org/apache/hadoop/hbase/LocalHBaseCluster.java

@@ -20,6 +20,7 @@
 package org.apache.hadoop.hbase;
 
 import java.io.IOException;
+import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@@ -27,6 +28,7 @@ import java.util.List;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.ReflectionUtils;
 
 /**
  * This class creates a single process HBase cluster. One thread is created for
@@ -229,7 +231,10 @@ public class LocalHBaseCluster implements HConstants {
     if (this.master != null) {
       while (this.master.isAlive()) {
         try {
-          this.master.join();
+          // The below has been replaced to debug sometime hangs on end of
+          // tests.
+          // this.master.join():
+          threadDumpingJoin(this.master);
         } catch(InterruptedException e) {
           // continue
         }
@@ -240,6 +245,22 @@ public class LocalHBaseCluster implements HConstants {
       " " + this.regionThreads.size() + " region server(s)");
   }
 
+  public void threadDumpingJoin(final Thread t) throws InterruptedException {
+    if (t == null) {
+      return;
+    }
+    long startTime = System.currentTimeMillis();
+    while (t.isAlive()) {
+      Thread.sleep(1000);
+      if (System.currentTimeMillis() - startTime > 60000) {
+        startTime = System.currentTimeMillis();
+        ReflectionUtils.printThreadInfo(new PrintWriter(System.out),
+            "Automatic Stack Trace every 60 seconds waiting on " +
+            t.getName());
+      }
+    }
+  }
+
   /**
    * Changes <code>hbase.master</code> from 'local' to 'localhost:PORT' in
    * passed Configuration instance.

+ 5 - 0
src/contrib/hbase/src/java/org/apache/hadoop/hbase/io/TextSequence.java

@@ -37,6 +37,11 @@ import org.apache.hadoop.io.WritableComparator;
  * 
  * <p>Equals considers a Text equal if the TextSequence brackets the same bytes.
  * 
+ * <p>TextSequence will not always work as a Text.  For instance, the following
+ * fails <code>Text c = new Text(new TextSequence(new Text("some string")));
+ * </code> because the Text constructor accesses private Text data members
+ * making the new instance from the passed 'Text'.
+ * 
  * <p>TODO: Should this be an Interface as CharSequence is?
  */
 public class TextSequence extends Text {

+ 10 - 0
src/contrib/hbase/src/test/hbase-site.xml

@@ -116,4 +116,14 @@
   	<value>/hbase</value>
   	<description>location of HBase instance in dfs</description>
   </property>
+  <property>
+    <name>hbase.hregion.max.filesize</name>
+    <value>67108864</value>
+    <description>
+    Maximum desired file size for an HRegion.  If filesize exceeds
+    value + (value / 2), the HRegion is split in two.  Default: 256M.
+
+    Keep the maximum filesize small so we split more often in tests.
+    </description>
+  </property>
 </configuration>

+ 7 - 1
src/contrib/hbase/src/test/org/apache/hadoop/hbase/StaticTestEnvironment.java

@@ -138,7 +138,13 @@ public class StaticTestEnvironment {
       }
 
       LOG.info("Shutting down Mini DFS ");
-      cluster.shutdown();
+      try {
+        cluster.shutdown();
+      } catch (Exception e) {
+        /// Can get a java.lang.reflect.UndeclaredThrowableException thrown
+        // here because of an InterruptedException. Don't let exceptions in
+        // here be cause of test failure.
+      }
     }
   }
 }

+ 15 - 15
src/contrib/hbase/src/test/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java

@@ -122,6 +122,8 @@ public class TestTableMapReduce extends MultiRegionTable {
       dir = new Path("/hbase");
       fs.mkdirs(dir);
       // Start up HBase cluster
+      // Only one region server.  MultiRegionServer manufacturing code below
+      // depends on there being one region server only.
       hCluster = new MiniHBaseCluster(conf, 1, dfsCluster);
       LOG.info("Master is at " + this.conf.get(HConstants.MASTER_ADDRESS));
     } catch (Exception e) {
@@ -235,7 +237,8 @@ public class TestTableMapReduce extends MultiRegionTable {
         }
       }
 
-      LOG.info("Print table contents before map/reduce");
+      LOG.info("Print table contents before map/reduce for " +
+        SINGLE_REGION_TABLE_NAME);
       scanTable(SINGLE_REGION_TABLE_NAME, true);
 
       @SuppressWarnings("deprecation")
@@ -252,19 +255,18 @@ public class TestTableMapReduce extends MultiRegionTable {
 
         TableReduce.initJob(SINGLE_REGION_TABLE_NAME,
             IdentityTableReduce.class, jobConf);
-
+        LOG.info("Started " + SINGLE_REGION_TABLE_NAME);
         JobClient.runJob(jobConf);
+        
+        LOG.info("Print table contents after map/reduce for " +
+          SINGLE_REGION_TABLE_NAME);
+        scanTable(SINGLE_REGION_TABLE_NAME, true);
 
+        // verify map-reduce results
+        verify(SINGLE_REGION_TABLE_NAME);
       } finally {
         mrCluster.shutdown();
       }
-    
-      LOG.info("Print table contents after map/reduce");
-      scanTable(SINGLE_REGION_TABLE_NAME, true);
-
-      // verify map-reduce results
-      verify(SINGLE_REGION_TABLE_NAME);
-
     } finally {
       table.close();
     }
@@ -307,16 +309,14 @@ public class TestTableMapReduce extends MultiRegionTable {
 
         TableReduce.initJob(MULTI_REGION_TABLE_NAME,
             IdentityTableReduce.class, jobConf);
-
+        LOG.info("Started " + MULTI_REGION_TABLE_NAME);
         JobClient.runJob(jobConf);
-
+        
+        // verify map-reduce results
+        verify(MULTI_REGION_TABLE_NAME);
       } finally {
         mrCluster.shutdown();
       }
-
-      // verify map-reduce results
-      verify(MULTI_REGION_TABLE_NAME);
-      
     } finally {
       table.close();
     }