Просмотр исходного кода

HDFS-4186. logSync() is called with the write lock held while releasing lease

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1409994 13f79535-47bb-0310-9956-ffa450edef68
Daryn Sharp 12 лет назад
Родитель
Сommit
3ca3a8793f

+ 4 - 1
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -11,7 +11,7 @@ Release 0.23.6 - UNRELEASED
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
   BUG FIXES
   BUG FIXES
-/
+
 Release 0.23.5 - UNRELEASED
 Release 0.23.5 - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES
@@ -68,6 +68,9 @@ Release 0.23.5 - UNRELEASED
 
 
     HDFS-4182. SecondaryNameNode leaks NameCache entries (bobby)
     HDFS-4182. SecondaryNameNode leaks NameCache entries (bobby)
 
 
+    HDFS-4186. logSync() is called with the write lock held while releasing
+    lease (Kihwal Lee via daryn)
+
 Release 0.23.4
 Release 0.23.4
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 14 - 10
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -1129,8 +1129,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
           createParent, replication, blockSize);
           createParent, replication, blockSize);
     } finally {
     } finally {
       writeUnlock();
       writeUnlock();
+      // There might be transactions logged while trying to recover the lease.
+      // They need to be sync'ed even when an exception was thrown.
+      getEditLog().logSync();
     }
     }
-    getEditLog().logSync();
     if (auditLog.isInfoEnabled() && isExternalInvocation()) {
     if (auditLog.isInfoEnabled() && isExternalInvocation()) {
       final HdfsFileStatus stat = dir.getFileInfo(src, false);
       final HdfsFileStatus stat = dir.getFileInfo(src, false);
       logAuditEvent(UserGroupInformation.getCurrentUser(),
       logAuditEvent(UserGroupInformation.getCurrentUser(),
@@ -1336,6 +1338,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       recoverLeaseInternal(inode, src, holder, clientMachine, true);
       recoverLeaseInternal(inode, src, holder, clientMachine, true);
     } finally {
     } finally {
       writeUnlock();
       writeUnlock();
+      // There might be transactions logged while trying to recover the lease.
+      // They need to be sync'ed even when an exception was thrown.
+      getEditLog().logSync();
     }
     }
     return false;
     return false;
   }
   }
@@ -1437,8 +1442,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
                         false, blockManager.maxReplication, (long)0);
                         false, blockManager.maxReplication, (long)0);
     } finally {
     } finally {
       writeUnlock();
       writeUnlock();
+      // There might be transactions logged while trying to recover the lease.
+      // They need to be sync'ed even when an exception was thrown.
+      getEditLog().logSync();
     }
     }
-    getEditLog().logSync();
     if (lb != null) {
     if (lb != null) {
       if (NameNode.stateChangeLog.isDebugEnabled()) {
       if (NameNode.stateChangeLog.isDebugEnabled()) {
         NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file "
         NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file "
@@ -2171,7 +2178,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    *         RecoveryInProgressException if lease recovery is in progress.<br>
    *         RecoveryInProgressException if lease recovery is in progress.<br>
    *         IOException in case of an error.
    *         IOException in case of an error.
    * @return true  if file has been successfully finalized and closed or 
    * @return true  if file has been successfully finalized and closed or 
-   *         false if block recovery has been initiated
+   *         false if block recovery has been initiated. Since the lease owner
+   *         has been changed and logged, caller should call logSync().
    */
    */
   boolean internalReleaseLease(Lease lease, String src, 
   boolean internalReleaseLease(Lease lease, String src, 
       String recoveryLeaseHolder) throws AlreadyBeingCreatedException, 
       String recoveryLeaseHolder) throws AlreadyBeingCreatedException, 
@@ -2303,6 +2311,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     assert hasWriteLock();
     assert hasWriteLock();
     if(newHolder == null)
     if(newHolder == null)
       return lease;
       return lease;
+    // The following transaction is not synced. Make sure it's sync'ed later.
     logReassignLease(lease.getHolder(), src, newHolder);
     logReassignLease(lease.getHolder(), src, newHolder);
     return reassignLeaseInternal(lease, src, newHolder, pendingFile);
     return reassignLeaseInternal(lease, src, newHolder, pendingFile);
   }
   }
@@ -4225,13 +4234,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   
   
   private void logReassignLease(String leaseHolder, String src,
   private void logReassignLease(String leaseHolder, String src,
       String newHolder) throws IOException {
       String newHolder) throws IOException {
-    writeLock();
-    try {
-      getEditLog().logReassignLease(leaseHolder, src, newHolder);
-    } finally {
-      writeUnlock();
-    }
-    getEditLog().logSync();
+    assert hasWriteLock();
+    getEditLog().logReassignLease(leaseHolder, src, newHolder);
   }
   }
   
   
   /**
   /**

+ 17 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java

@@ -370,16 +370,20 @@ public class LeaseManager {
     /** Check leases periodically. */
     /** Check leases periodically. */
     public void run() {
     public void run() {
       for(; fsnamesystem.isRunning(); ) {
       for(; fsnamesystem.isRunning(); ) {
+        boolean needSync = false;
         fsnamesystem.writeLock();
         fsnamesystem.writeLock();
         try {
         try {
           if (!fsnamesystem.isInSafeMode()) {
           if (!fsnamesystem.isInSafeMode()) {
-            checkLeases();
+            needSync = checkLeases();
           }
           }
         } finally {
         } finally {
           fsnamesystem.writeUnlock();
           fsnamesystem.writeUnlock();
+          // lease reassignments should to be sync'ed.
+          if (needSync) {
+            fsnamesystem.getEditLog().logSync();
+          }
         }
         }
 
 
-
         try {
         try {
           Thread.sleep(HdfsServerConstants.NAMENODE_LEASE_RECHECK_INTERVAL);
           Thread.sleep(HdfsServerConstants.NAMENODE_LEASE_RECHECK_INTERVAL);
         } catch(InterruptedException ie) {
         } catch(InterruptedException ie) {
@@ -391,13 +395,16 @@ public class LeaseManager {
     }
     }
   }
   }
 
 
-  /** Check the leases beginning from the oldest. */
-  private synchronized void checkLeases() {
+  /** Check the leases beginning from the oldest.
+   *  @return true if sync is needed
+   */
+  private synchronized boolean checkLeases() {
+    boolean needSync = false;
     assert fsnamesystem.hasWriteLock();
     assert fsnamesystem.hasWriteLock();
     for(; sortedLeases.size() > 0; ) {
     for(; sortedLeases.size() > 0; ) {
       final Lease oldest = sortedLeases.first();
       final Lease oldest = sortedLeases.first();
       if (!oldest.expiredHardLimit()) {
       if (!oldest.expiredHardLimit()) {
-        return;
+        return needSync;
       }
       }
 
 
       LOG.info("Lease " + oldest + " has expired hard limit");
       LOG.info("Lease " + oldest + " has expired hard limit");
@@ -420,6 +427,10 @@ public class LeaseManager {
               LOG.debug("Started block recovery " + p + " lease " + oldest);
               LOG.debug("Started block recovery " + p + " lease " + oldest);
             }
             }
           }
           }
+          // If a lease recovery happened, we need to sync later.
+          if (!needSync && !completed) {
+            needSync = true;
+          }
         } catch (IOException e) {
         } catch (IOException e) {
           LOG.error("Cannot release the path "+p+" in the lease "+oldest, e);
           LOG.error("Cannot release the path "+p+" in the lease "+oldest, e);
           removing.add(p);
           removing.add(p);
@@ -430,6 +441,7 @@ public class LeaseManager {
         removeLease(oldest, p);
         removeLease(oldest, p);
       }
       }
     }
     }
+    return needSync;
   }
   }
 
 
   /** {@inheritDoc} */
   /** {@inheritDoc} */

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java

@@ -400,7 +400,7 @@ public class TestEditLog extends TestCase {
 
 
       // Now ask to sync edit from B, which should sync both edits.
       // Now ask to sync edit from B, which should sync both edits.
       doCallLogSync(threadB, editLog);
       doCallLogSync(threadB, editLog);
-      assertEquals("logSync from second thread should bump txid up to 2",
+      assertEquals("logSync from second thread should bump txid up to 3",
         3, editLog.getSyncTxId());
         3, editLog.getSyncTxId());
 
 
       // Now ask to sync edit from A, which was already batched in - thus
       // Now ask to sync edit from A, which was already batched in - thus