Browse Source

HADOOP-2338 Fix NullPointerException in master server.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@602226 13f79535-47bb-0310-9956-ffa450edef68
Jim Kellerman 17 years ago
parent
commit
86d65a0ac2

+ 1 - 0
src/contrib/hbase/CHANGES.txt

@@ -60,6 +60,7 @@ Trunk (unreleased changes)
                (Bryan Duxbury via Stack)
    HADOOP-2365 Result of HashFunction.hash() contains all identical values
    HADOOP-2362 Leaking hdfs file handle on region split
+   HADOOP-2338 Fix NullPointerException in master server.
 
   IMPROVEMENTS
    HADOOP-2401 Add convenience put method that takes writable

File diff suppressed because it is too large
+ 374 - 327
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java


+ 15 - 1
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMsg.java

@@ -48,6 +48,9 @@ public class HMsg implements Writable {
 
   /** Stop serving the specified region and don't report back that it's closed */
   public static final byte MSG_REGION_CLOSE_WITHOUT_REPORT = 6;
+  
+  /** Stop serving user regions */
+  public static final byte MSG_REGIONSERVER_QUIESCE = 7;
 
   // Messages sent from the region server to the master
   
@@ -72,9 +75,12 @@ public class HMsg implements Writable {
    * region server is shutting down
    * 
    * note that this message is followed by MSG_REPORT_CLOSE messages for each
-   * region the region server was serving.
+   * region the region server was serving, unless it was told to quiesce.
    */
   public static final byte MSG_REPORT_EXITING = 104;
+  
+  /** region server has closed all user regions but is still serving meta regions */
+  public static final byte MSG_REPORT_QUIESCED = 105;
 
   byte msg;
   HRegionInfo info;
@@ -148,6 +154,10 @@ public class HMsg implements Writable {
       message.append("MSG_REGION_CLOSE_WITHOUT_REPORT : ");
       break;
       
+    case MSG_REGIONSERVER_QUIESCE:
+      message.append("MSG_REGIONSERVER_QUIESCE : ");
+      break;
+      
     case MSG_REPORT_PROCESS_OPEN:
       message.append("MSG_REPORT_PROCESS_OPEN : ");
       break;
@@ -168,6 +178,10 @@ public class HMsg implements Writable {
       message.append("MSG_REPORT_EXITING : ");
       break;
       
+    case MSG_REPORT_QUIESCED:
+      message.append("MSG_REPORT_QUIESCED : ");
+      break;
+      
     default:
       message.append("unknown message code (");
       message.append(msg);

+ 15 - 0
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionInfo.java

@@ -193,6 +193,21 @@ public class HRegionInfo implements WritableComparable {
     return tableDesc;
   }
   
+  /** @return true if this is the root region */
+  public boolean isRootRegion() {
+    return this.tableDesc.isRootRegion();
+  }
+  
+  /** @return true if this is the meta table */
+  public boolean isMetaTable() {
+    return this.tableDesc.isMetaTable();
+  }
+
+  /** @return true if this region is a meta region */
+  public boolean isMetaRegion() {
+    return this.tableDesc.isMetaRegion();
+  }
+  
   /**
    * @return True if has been split and has daughters.
    */

+ 82 - 10
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java

@@ -81,6 +81,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
   // Chore threads need to know about the hosting class.
   protected final AtomicBoolean stopRequested = new AtomicBoolean(false);
   
+  protected final AtomicBoolean quiesced = new AtomicBoolean(false);
+  
   // Go down hard.  Used if file system becomes unavailable and also in
   // debugging and unit tests.
   protected volatile boolean abortRequested;
@@ -652,6 +654,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
    * load/unload instructions.
    */
   public void run() {
+    boolean quiesceRequested = false;
     try {
       init(reportForDuty());
       long lastMsg = 0;
@@ -682,6 +685,16 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
               HMsg msgs[] =
                 this.hbaseMaster.regionServerReport(serverInfo, outboundArray);
               lastMsg = System.currentTimeMillis();
+              
+              if (this.quiesced.get() && onlineRegions.size() == 0) {
+                // We've just told the master we're exiting because we aren't
+                // serving any regions. So set the stop bit and exit.
+                LOG.info("Server quiesced and not serving any regions. " +
+                    "Starting shutdown");
+                stopRequested.set(true);
+                continue;
+              }
+              
               // Queue up the HMaster's instruction stream for processing
               boolean restart = false;
               for(int i = 0; i < msgs.length && !stopRequested.get() &&
@@ -689,9 +702,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
                 switch(msgs[i].getMsg()) {
                 
                 case HMsg.MSG_CALL_SERVER_STARTUP:
-                  if (LOG.isDebugEnabled()) {
-                    LOG.debug("Got call server startup message");
-                  }
+                  LOG.info("Got call server startup message");
                   // We the MSG_CALL_SERVER_STARTUP on startup but we can also
                   // get it when the master is panicing because for instance
                   // the HDFS has been yanked out from under it.  Be wary of
@@ -725,11 +736,22 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
                   break;
 
                 case HMsg.MSG_REGIONSERVER_STOP:
-                  if (LOG.isDebugEnabled()) {
-                    LOG.debug("Got regionserver stop message");
-                  }
+                  LOG.info("Got regionserver stop message");
                   stopRequested.set(true);
                   break;
+                  
+                case HMsg.MSG_REGIONSERVER_QUIESCE:
+                  if (!quiesceRequested) {
+                    LOG.info("Got quiesce server message");
+                    try {
+                      toDo.put(new ToDoEntry(msgs[i]));
+                    } catch (InterruptedException e) {
+                      throw new RuntimeException("Putting into msgQueue was " +
+                        "interrupted.", e);
+                    }
+                    quiesceRequested = true;
+                  }
+                  break;
 
                 default:
                   if (fsOk) {
@@ -1101,6 +1123,10 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
           try {
             LOG.info(e.msg.toString());
             switch(e.msg.getMsg()) {
+            
+            case HMsg.MSG_REGIONSERVER_QUIESCE:
+              closeUserRegions();
+              break;
 
             case HMsg.MSG_REGION_OPEN:
               // Open a region
@@ -1149,12 +1175,19 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
     }
   }
   
-  void openRegion(final HRegionInfo regionInfo) throws IOException {
+  void openRegion(final HRegionInfo regionInfo) {
     HRegion region = onlineRegions.get(regionInfo.getRegionName());
     if(region == null) {
-      region = new HRegion(new Path(this.conf.get(HConstants.HBASE_DIR)),
-        this.log, FileSystem.get(conf), conf, regionInfo, null,
-        this.cacheFlusher);
+      try {
+        region = new HRegion(new Path(this.conf.get(HConstants.HBASE_DIR)),
+            this.log, FileSystem.get(conf), conf, regionInfo, null,
+            this.cacheFlusher);
+        
+      } catch (IOException e) {
+        LOG.error("error opening region " + regionInfo.getRegionName(), e);
+        reportClose(region);
+        return;
+      }
       this.lock.writeLock().lock();
       try {
         this.log.setSequenceNumber(region.getMinSequenceId());
@@ -1208,6 +1241,45 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
     return regionsToClose;
   }
 
+  /** Called as the first stage of cluster shutdown. */
+  void closeUserRegions() {
+    ArrayList<HRegion> regionsToClose = new ArrayList<HRegion>();
+    this.lock.writeLock().lock();
+    try {
+      synchronized (onlineRegions) {
+        for (Iterator<Map.Entry<Text, HRegion>> i =
+          onlineRegions.entrySet().iterator();
+        i.hasNext();) {
+          Map.Entry<Text, HRegion> e = i.next();
+          HRegion r = e.getValue();
+          if (!r.getRegionInfo().isMetaRegion()) {
+            regionsToClose.add(r);
+            i.remove();
+          }
+        }
+      }
+    } finally {
+      this.lock.writeLock().unlock();
+    }
+    for(HRegion region: regionsToClose) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("closing region " + region.getRegionName());
+      }
+      try {
+        region.close(false);
+      } catch (IOException e) {
+        LOG.error("error closing region " + region.getRegionName(),
+          RemoteExceptionHandler.checkIOException(e));
+      }
+    }
+    this.quiesced.set(true);
+    if (onlineRegions.size() == 0) {
+      outboundMsgs.add(new HMsg(HMsg.MSG_REPORT_EXITING));
+    } else {
+      outboundMsgs.add(new HMsg(HMsg.MSG_REPORT_QUIESCED));
+    }
+  }
+
   //
   // HRegionInterface
   //

+ 27 - 3
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTableDescriptor.java

@@ -52,7 +52,8 @@ public class HTableDescriptor implements WritableComparable {
             HColumnDescriptor.CompressionType.NONE, false, Integer.MAX_VALUE,
             null));
   
-
+  private boolean rootregion;
+  private boolean metaregion;
   private Text name;
   // TODO: Does this need to be a treemap?  Can it be a HashMap?
   private final TreeMap<Text, HColumnDescriptor> families;
@@ -69,6 +70,8 @@ public class HTableDescriptor implements WritableComparable {
 
   /** Used to construct the table descriptors for root and meta tables */
   private HTableDescriptor(Text name, HColumnDescriptor family) {
+    rootregion = name.equals(HConstants.ROOT_TABLE_NAME);
+    this.metaregion = true;
     this.name = new Text(name);
     this.families = new TreeMap<Text, HColumnDescriptor>();
     families.put(family.getName(), family);
@@ -92,13 +95,30 @@ public class HTableDescriptor implements WritableComparable {
    * <code>[a-zA-Z_0-9]
    */
   public HTableDescriptor(String name) {
+    this();
     Matcher m = LEGAL_TABLE_NAME.matcher(name);
     if (m == null || !m.matches()) {
       throw new IllegalArgumentException(
           "Table names can only contain 'word characters': i.e. [a-zA-Z_0-9");
     }
-    this.name = new Text(name);
-    this.families = new TreeMap<Text, HColumnDescriptor>();
+    this.name.set(name);
+    this.rootregion = false;
+    this.metaregion = false;
+  }
+  
+  /** @return true if this is the root region */
+  public boolean isRootRegion() {
+    return rootregion;
+  }
+  
+  /** @return true if table is the meta table */
+  public boolean isMetaTable() {
+    return metaregion && !rootregion;
+  }
+  
+  /** @return true if this is a meta region (part of the root or meta tables) */
+  public boolean isMetaRegion() {
+    return metaregion;
   }
 
   /** @return name of table */
@@ -165,6 +185,8 @@ public class HTableDescriptor implements WritableComparable {
 
   /** {@inheritDoc} */
   public void write(DataOutput out) throws IOException {
+    out.writeBoolean(rootregion);
+    out.writeBoolean(metaregion);
     name.write(out);
     out.writeInt(families.size());
     for(Iterator<HColumnDescriptor> it = families.values().iterator();
@@ -175,6 +197,8 @@ public class HTableDescriptor implements WritableComparable {
 
   /** {@inheritDoc} */
   public void readFields(DataInput in) throws IOException {
+    this.rootregion = in.readBoolean();
+    this.metaregion = in.readBoolean();
     this.name.readFields(in);
     int numCols = in.readInt();
     families.clear();

Some files were not shown because too many files changed in this diff