Browse Source

HADOOP-563. Improve NameNode lease policy. Contributed by Dhruba.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@468107 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 years ago
parent
commit
e56cc797c4

+ 8 - 0
CHANGES.txt

@@ -50,6 +50,14 @@ Trunk (unreleased changes)
 14. HADOOP-627.  Fix some synchronization problems in MiniMRCluster
     that sometimes caused unit tests to fail.  (Nigel Daley via cutting)
 
+15. HADOOP-563.  Improve the NameNode's lease policy so that leases
+    are held for one hour without renewal (instead of one minute).
+    However another attempt to create the same file will still succeed
+    if the lease has not been renewed within a minute.  This prevents
+    communication or scheduling problems from causing a write to fail
+    for up to an hour, barring some other process trying to create the
+    same file.  (Dhruba Borthakur via cutting)
+
 
 Release 0.7.2 - 2006-10-18
 

+ 2 - 2
src/java/org/apache/hadoop/dfs/DFSClient.java

@@ -445,7 +445,7 @@ class DFSClient implements FSConstants {
         public void run() {
             long lastRenewed = 0;
             while (running) {
-                if (System.currentTimeMillis() - lastRenewed > (LEASE_PERIOD / 2)) {
+                if (System.currentTimeMillis() - lastRenewed > (LEASE_SOFTLIMIT_PERIOD / 2)) {
                     try {
                       if( pendingCreates.size() > 0 )
                         namenode.renewLease(clientName);
@@ -1004,7 +1004,7 @@ class DFSClient implements FSConstants {
                   // wait and try again.
                   LOG.info(StringUtils.stringifyException(e));
                   try {
-                    Thread.sleep(LEASE_PERIOD);
+                    Thread.sleep(LEASE_SOFTLIMIT_PERIOD);
                   } catch (InterruptedException ie) {
                   }
                 }

+ 2 - 1
src/java/org/apache/hadoop/dfs/FSConstants.java

@@ -103,7 +103,8 @@ public interface FSConstants {
     public static long HEARTBEAT_INTERVAL = 3;
     public static long EXPIRE_INTERVAL = 10 * 60 * 1000;
     public static long BLOCKREPORT_INTERVAL = 60 * 60 * 1000;
-    public static long LEASE_PERIOD = 60 * 1000;
+    public static final long LEASE_SOFTLIMIT_PERIOD = 60 * 1000;
+    public static final long LEASE_HARDLIMIT_PERIOD = 60 * LEASE_SOFTLIMIT_PERIOD;
     public static int READ_TIMEOUT = 60 * 1000;
 
     // We need to limit the length and depth of a path in the filesystem.  HADOOP-438

+ 64 - 10
src/java/org/apache/hadoop/dfs/FSNamesystem.java

@@ -414,11 +414,54 @@ class FSNamesystem implements FSConstants {
         throw new IOException("Invalid file name: " + src);      	  
       }
       try {
-        if (pendingCreates.get(src) != null) {
-           throw new AlreadyBeingCreatedException(
-                   "failed to create file " + src + " for " + holder +
-                   " on client " + clientMachine + 
-                   " because pendingCreates is non-null.");
+        FileUnderConstruction pendingFile = (FileUnderConstruction ) 
+                                              pendingCreates.get(src);
+        if (pendingFile != null) {
+          //
+          // If the file exists in pendingCreate, then it must be in our
+          // leases. Find the appropriate lease record.
+          //
+          Lease lease = (Lease) leases.get(holder);
+          //
+          // We found the lease for this file. And surprisingly the original
+          // holder is trying to recreate this file. This should never occur.
+          //
+          if (lease != null) {
+            throw new AlreadyBeingCreatedException(
+                  "failed to create file " + src + " for " + holder +
+                  " on client " + clientMachine + 
+                  " because current leaseholder is trying to recreate file.");
+          }
+          //
+          // Find the original holder.
+          //
+          UTF8 oldholder = pendingFile.getClientName();
+          lease = (Lease) leases.get(oldholder);
+          if (lease == null) {
+            throw new AlreadyBeingCreatedException(
+                  "failed to create file " + src + " for " + holder +
+                  " on client " + clientMachine + 
+                  " because pendingCreates is non-null but no leases found.");
+          }
+          //
+          // If the original holder has not renewed in the last SOFTLIMIT 
+          // period, then reclaim all resources and allow this request 
+          // to proceed. Otherwise, prevent this request from creating file.
+          //
+          if (lease.expiredSoftLimit()) {
+            lease.releaseLocks();
+            leases.remove(lease.holder);
+            LOG.info("Removing lease " + lease + " ");
+            if (!sortedLeases.remove(lease)) {
+              LOG.error("Unknown failure trying to remove " + lease + 
+                       " from lease set.");
+            }
+          } else  {
+            throw new AlreadyBeingCreatedException(
+                  "failed to create file " + src + " for " + holder +
+                  " on client " + clientMachine + 
+                  " because pendingCreates is non-null.");
+          }
         }
 
         try {
@@ -929,12 +972,23 @@ class FSNamesystem implements FSConstants {
         public void renew() {
             this.lastUpdate = now();
         }
-        public boolean expired() {
-            if (now() - lastUpdate > LEASE_PERIOD) {
+        /**
+         * Returns true if the Hard Limit Timer has expired
+         */
+        public boolean expiredHardLimit() {
+            if (now() - lastUpdate > LEASE_HARDLIMIT_PERIOD) {
+                return true;
+            }
+            return false;
+        }
+        /**
+         * Returns true if the Soft Limit Timer has expired
+         */
+        public boolean expiredSoftLimit() {
+            if (now() - lastUpdate > LEASE_SOFTLIMIT_PERIOD) {
                 return true;
-            } else {
-                return false;
             }
+            return false;
         }
         public void obtained(UTF8 src) {
             locks.add(src);
@@ -999,7 +1053,7 @@ class FSNamesystem implements FSConstants {
                         Lease top;
                         while ((sortedLeases.size() > 0) &&
                                ((top = (Lease) sortedLeases.first()) != null)) {
-                            if (top.expired()) {
+                            if (top.expiredHardLimit()) {
                                 top.releaseLocks();
                                 leases.remove(top.holder);
                                 LOG.info("Removing lease " + top + ", leases remaining: " + sortedLeases.size());