Browse Source

HADOOP-1460 On shutdown IOException with complaint 'Cannot cancel lease that is not held'

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@544512 13f79535-47bb-0310-9956-ffa450edef68
Jim Kellerman 18 years ago
parent
commit
c9d92755e4

+ 2 - 0
src/contrib/hbase/CHANGES.txt

@@ -23,3 +23,5 @@ Trunk (unreleased changes)
  12. HADOOP-1392. Part2: includes table compaction by merging adjacent regions
      that have shrunk in size.
  13. HADOOP-1445 Support updates across region splits and compactions
+ 14. HADOOP-1460 On shutdown IOException with complaint 'Cannot cancel lease
+     that is not held'

+ 0 - 50
src/contrib/hbase/conf/hbase-site.xml

@@ -1,54 +1,4 @@
 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 <configuration>
-  <property>
-    <name>hbase.regiondir</name>
-    <value>hbase</value>
-    <description>The directory shared by region servers.
-    </description>
-  </property>
-  <property>
-    <name>hbase.regionserver.msginterval</name>
-    <value>1000</value>
-    <description>Interval between messages from the RegionServer to HMaster
-    in milliseconds.  Default is 15. Set this value low if you want unit
-    tests to be responsive.
-    </description>
-  </property>
-  <!--
-  <property>
-    <name>hbase.master.meta.thread.rescanfrequency</name>
-    <value>600000</value>
-    <description>How long the HMaster sleeps (in milliseconds) between scans of
-    the root and meta tables.
-    </description>
-  </property>
-  <property>
-    <name>hbase.master.lease.period</name>
-    <value>360000</value>
-    <description>HMaster server lease period in milliseconds. Default is
-    180 seconds.</description>
-  </property>
-  <property>
-    <name>hbase.regionserver.lease.period</name>
-    <value>360000</value>
-    <description>HMaster server lease period in milliseconds. Default is
-    180 seconds.</description>
-  </property>
-  -->
-  <!--
-  <property>
-    <name>hbase.hregion.max.filesize</name>
-    <value>3421223</value>
-    <description>
-    Maximum desired file size for an HRegion.  If filesize exceeds
-    value + (value / 2), the HRegion is split in two.  Default: 128M.
-    </description>
-  </property>
-  <property>
-    <name>hbase.client.timeout.length</name>
-    <value>10000</value>
-    <description>Client timeout in milliseconds</description>
-  </property>
-  -->
 </configuration>

+ 26 - 20
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java

@@ -738,17 +738,17 @@ public class HMaster implements HConstants, HMasterInterface,
   
   /** HRegionServers call this method upon startup. */
   public void regionServerStartup(HServerInfo serverInfo) throws IOException {
-    String server = serverInfo.getServerAddress().toString().trim();
+    String s = serverInfo.getServerAddress().toString().trim();
     HServerInfo storedInfo = null;
 
     if(LOG.isDebugEnabled()) {
-      LOG.debug("received start message from: " + server);
+      LOG.debug("received start message from: " + s);
     }
     
     // If we get the startup message but there's an old server by that
     // name, then we can timeout the old one right away and register
     // the new one.
-    storedInfo = serversToServerInfo.remove(server);
+    storedInfo = serversToServerInfo.remove(s);
 
     if(storedInfo != null && !closed) {
       synchronized(msgQueue) {
@@ -759,34 +759,40 @@ public class HMaster implements HConstants, HMasterInterface,
 
     // Either way, record the new server
 
-    serversToServerInfo.put(server, serverInfo);
+    serversToServerInfo.put(s, serverInfo);
 
     if(!closed) {
-      Text serverLabel = new Text(server);        
-      serverLeases.createLease(serverLabel, serverLabel, new ServerExpirer(server));
+      Text serverLabel = new Text(s);
+      LOG.debug("Created lease for " + serverLabel);
+      serverLeases.createLease(serverLabel, serverLabel, new ServerExpirer(s));
     }
   }
 
   /** HRegionServers call this method repeatedly. */
-  public HMsg[] regionServerReport(HServerInfo serverInfo, HMsg msgs[]) throws IOException {
-    String server = serverInfo.getServerAddress().toString().trim();
-    Text serverLabel = new Text(server);
-
-    if(closed
-        || (msgs.length == 1 && msgs[0].getMsg() == HMsg.MSG_REPORT_EXITING)) {
-      // We're shutting down. Or the HRegionServer is.
-      serversToServerInfo.remove(server);
-      serverLeases.cancelLease(serverLabel, serverLabel);
+  public HMsg[] regionServerReport(HServerInfo serverInfo, HMsg msgs[])
+  throws IOException {
+    String s = serverInfo.getServerAddress().toString().trim();
+    Text serverLabel = new Text(s);
+
+    if (closed ||
+        msgs.length == 1 && msgs[0].getMsg() == HMsg.MSG_REPORT_EXITING) {
+      // HRegionServer is shutting down.
+      if (serversToServerInfo.remove(s) != null) {
+        // Only cancel lease once (This block can run a couple of times during
+        // shutdown).
+        LOG.debug("Cancelling lease for " + serverLabel);
+        serverLeases.cancelLease(serverLabel, serverLabel);
+      }
       HMsg returnMsgs[] = {new HMsg(HMsg.MSG_REGIONSERVER_STOP)};
       return returnMsgs;
     }
 
-    HServerInfo storedInfo = serversToServerInfo.get(server);
+    HServerInfo storedInfo = serversToServerInfo.get(s);
 
     if(storedInfo == null) {
 
       if(LOG.isDebugEnabled()) {
-        LOG.debug("received server report from unknown server: " + server);
+        LOG.debug("received server report from unknown server: " + s);
       }
 
       // The HBaseMaster may have been restarted.
@@ -808,7 +814,7 @@ public class HMaster implements HConstants, HMasterInterface,
       // The answer is to ask A to shut down for good.
 
       if(LOG.isDebugEnabled()) {
-        LOG.debug("region server race condition detected: " + server);
+        LOG.debug("region server race condition detected: " + s);
       }
 
       HMsg returnMsgs[] = {
@@ -821,11 +827,11 @@ public class HMaster implements HConstants, HMasterInterface,
       // All's well.  Renew the server's lease.
       // This will always succeed; otherwise, the fetch of serversToServerInfo
       // would have failed above.
-
+      
       serverLeases.renewLease(serverLabel, serverLabel);
 
       // Refresh the info object
-      serversToServerInfo.put(server, serverInfo);
+      serversToServerInfo.put(s, serverInfo);
 
       // Next, process messages for this server
       return processMsgs(serverInfo, msgs);

+ 32 - 11
src/contrib/hbase/src/java/org/apache/hadoop/hbase/Leases.java

@@ -22,7 +22,7 @@ import org.apache.hadoop.io.*;
 import java.io.*;
 import java.util.*;
 
-/*******************************************************************************
+/**
  * Leases
  *
  * There are several server classes in HBase that need to track external clients
@@ -36,9 +36,9 @@ import java.util.*;
  *
  * An instance of the Leases class will create a thread to do its dirty work.  
  * You should close() the instance if you want to clean up the thread properly.
- ******************************************************************************/
+ */
 public class Leases {
-  private static final Log LOG = LogFactory.getLog(Leases.class);
+  static final Log LOG = LogFactory.getLog(Leases.class.getName());
 
   long leasePeriod;
   long leaseCheckFrequency;
@@ -83,22 +83,30 @@ public class Leases {
       LOG.debug("leases closed");
     }
   }
+  
+  String getLeaseName(final Text holderId, final Text resourceId) {
+    return "<holderId=" + holderId + ", resourceId=" + resourceId + ">";
+  }
 
   /** A client obtains a lease... */
-  public void createLease(Text holderId, Text resourceId, LeaseListener listener) throws IOException {
+  public void createLease(Text holderId, Text resourceId,
+      final LeaseListener listener)
+  throws IOException {
     synchronized(leases) {
       synchronized(sortedLeases) {
         Lease lease = new Lease(holderId, resourceId, listener);
         Text leaseId = lease.getLeaseId();
         if(leases.get(leaseId) != null) {
           throw new IOException("Impossible state for createLease(): Lease " +
-            "for holderId " + holderId + " and resourceId " + resourceId +
-            " is still held.");
+            getLeaseName(holderId, resourceId) + " is still held.");
         }
         leases.put(leaseId, lease);
         sortedLeases.add(lease);
       }
     }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Created lease " + getLeaseName(holderId, resourceId));
+    }
   }
   
   /** A client renews a lease... */
@@ -110,8 +118,8 @@ public class Leases {
         if(lease == null) {
           // It's possible that someone tries to renew the lease, but 
           // it just expired a moment ago.  So fail.
-          throw new IOException("Cannot renew lease; not held (holderId=" +
-            holderId + ", resourceId=" + resourceId + ")");
+          throw new IOException("Cannot renew lease that is not held: " +
+            getLeaseName(holderId, resourceId));
         }
         
         sortedLeases.remove(lease);
@@ -119,9 +127,14 @@ public class Leases {
         sortedLeases.add(lease);
       }
     }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Renewed lease " + getLeaseName(holderId, resourceId));
+    }
   }
 
-  /** A client explicitly cancels a lease.  The lease-cleanup method is not called. */
+  /** A client explicitly cancels a lease.
+   * The lease-cleanup method is not called.
+   */
   public void cancelLease(Text holderId, Text resourceId) throws IOException {
     synchronized(leases) {
       synchronized(sortedLeases) {
@@ -132,7 +145,8 @@ public class Leases {
           // It's possible that someone tries to renew the lease, but 
           // it just expired a moment ago.  So fail.
           
-          throw new IOException("Cannot cancel lease that is not held (holderId=" + holderId + ", resourceId=" + resourceId + ")");
+          throw new IOException("Cannot cancel lease that is not held: " +
+            getLeaseName(holderId, resourceId));
         }
         
         sortedLeases.remove(lease);
@@ -140,7 +154,10 @@ public class Leases {
 
         lease.cancelled();
       }
-    }        
+    }     
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Cancel lease " + getLeaseName(holderId, resourceId));
+    }
   }
 
   /** LeaseMonitor is a thread that expires Leases that go on too long. */
@@ -211,6 +228,10 @@ public class Leases {
     }
     
     public void expired() {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Lease expired " + getLeaseName(this.holderId,
+          this.resourceId));
+      }
       listener.leaseExpired();
     }