Browse Source

HADOOP-4795. Prevent lease monitor getting into an infinite loop when leases and the namespace tree does not match. (szetszwo)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.19@725519 13f79535-47bb-0310-9956-ffa450edef68
Tsz-wo Sze 16 years ago
parent
commit
a81da5f386

+ 3 - 0
CHANGES.txt

@@ -1081,6 +1081,9 @@ Release 0.18.3 - Unreleased
     HADOOP-4806. HDFS rename should not use src path as a regular expression.
     (szetszwo)
 
+    HADOOP-4795. Prevent lease monitor getting into an infinite loop when
+    leases and the namespace tree does not match. (szetszwo)
+
 Release 0.18.2 - 2008-11-03
 
   BUG FIXES

+ 11 - 13
src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -318,7 +318,7 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean {
                             conf.getInt("dfs.replication.pending.timeout.sec", 
                                         -1) * 1000L);
     this.hbthread = new Daemon(new HeartbeatMonitor());
-    this.lmthread = new Daemon(leaseManager.createMonitor());
+    this.lmthread = new Daemon(leaseManager.new Monitor());
     this.replthread = new Daemon(new ReplicationMonitor());
     hbthread.start();
     lmthread.start();
@@ -1827,20 +1827,18 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean {
 
     INodeFile iFile = dir.getFileINode(src);
     if (iFile == null) {
-      NameNode.stateChangeLog.warn("DIR* NameSystem.internalReleaseCreate: "
-                                   + "attempt to release a create lock on "
-                                   + src + " file does not exist.");
-      return;
+      final String message = "DIR* NameSystem.internalReleaseCreate: "
+        + "attempt to release a create lock on "
+        + src + " file does not exist.";
+      NameNode.stateChangeLog.warn(message);
+      throw new IOException(message);
     }
     if (!iFile.isUnderConstruction()) {
-      NameNode.stateChangeLog.warn("DIR* NameSystem.internalReleaseCreate: "
-                                   + "attempt to release a create lock on "
-                                   + src + " but file is already closed.");
-      return;
-    }
-    if (NameNode.stateChangeLog.isDebugEnabled()) {
-      NameNode.stateChangeLog.debug("DIR* NameSystem.internalReleaseCreate: "
-          + src + " does not being written in " + lease);
+      final String message = "DIR* NameSystem.internalReleaseCreate: "
+        + "attempt to release a create lock on "
+        + src + " but file is already closed.";
+      NameNode.stateChangeLog.warn(message);
+      throw new IOException(message);
     }
 
     INodeFileUnderConstruction pendingFile = (INodeFileUnderConstruction) iFile;

+ 43 - 27
src/hdfs/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java

@@ -335,43 +335,59 @@ public class LeaseManager {
     this.hardLimit = hardLimit; 
   }
   
-  Monitor createMonitor() {return new Monitor();}
-
   /******************************************************
    * Monitor checks for leases that have expired,
    * and disposes of them.
    ******************************************************/
   class Monitor implements Runnable {
+    final String name = getClass().getSimpleName();
+
+    /** Check leases periodically. */
     public void run() {
-      try {
-        while (fsnamesystem.isRunning()) {
-          synchronized (fsnamesystem) {
-            synchronized (LeaseManager.this) {
-              Lease top;
-              while ((sortedLeases.size() > 0) &&
-                     ((top = sortedLeases.first()) != null)) {
-                if (top.expiredHardLimit()) {
-                  LOG.info("Lease Monitor: Removing lease " + top
-                      + ", sortedLeases.size()=: " + sortedLeases.size());
-                  for(StringBytesWritable s : top.paths) {
-                    fsnamesystem.internalReleaseLease(top, s.getString());
-                  }
-                } else {
-                  break;
-                }
-              }
-            }
+      for(; fsnamesystem.isRunning(); ) {
+        synchronized(fsnamesystem) {
+          checkLeases();
+        }
+
+        try {
+          Thread.sleep(2000);
+        } catch(InterruptedException ie) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug(name + " is interrupted", ie);
           }
+        }
+      }
+    }
+
+    /** Check the leases beginning from the oldest. */
+    private synchronized void checkLeases() {
+      for(; sortedLeases.size() > 0; ) {
+        final Lease oldest = sortedLeases.first();
+        if (!oldest.expiredHardLimit()) {
+          return;
+        }
+
+        LOG.info(name + ": Lease " + oldest + " has expired hard limit");
+
+        final List<StringBytesWritable> removing = new ArrayList<StringBytesWritable>();
+        for(StringBytesWritable p : oldest.getPaths()) {
+          try {
+            fsnamesystem.internalReleaseLease(oldest, p.getString());
+          } catch (IOException e) {
+            LOG.error("In " + name + ", cannot release the path " + p
+                + " in the lease " + oldest, e);
+            removing.add(p);
+          }
+        }
+
+        for(StringBytesWritable p : removing) {
           try {
-            Thread.sleep(2000);
-          } catch(InterruptedException ie) {
-            if (LOG.isDebugEnabled()) {
-              LOG.debug(getClass().getName() + " is interrupted", ie);
-            }
+            removeLease(oldest, p.getString());
+          } catch (IOException e) {
+            LOG.error("In " + name + ", cannot removeLease: oldest="
+                + oldest + ", p=" + p, e);
           }
         }
-      } catch(Exception e) {
-        LOG.error("In " + getClass().getName(), e);
       }
     }
   }