Przeglądaj źródła

HDFS-6229. Merge r1586715 from branch-2.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2.4@1586716 13f79535-47bb-0310-9956-ffa450edef68
Jing Zhao 11 lat temu
rodzic
commit
fac95b2b90

+ 22 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RetryCache.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.ipc;
 
 import java.util.Arrays;
 import java.util.UUID;
+import java.util.concurrent.locks.ReentrantLock;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -183,6 +184,8 @@ public class RetryCache {
   private final long expirationTime;
   private String cacheName;
 
+  private final ReentrantLock lock = new ReentrantLock();
+
   /**
    * Constructor
    * @param cacheName name to identify the cache by
@@ -206,6 +209,13 @@ public class RetryCache {
         || Arrays.equals(Server.getClientId(), RpcConstants.DUMMY_CLIENT_ID);
   }
 
+  public void lock() {
+    this.lock.lock();
+  }
+
+  public void unlock() {
+    this.lock.unlock();
+  }
 
   private void incrCacheClearedCounter() {
     retryCacheMetrics.incrCacheCleared();
@@ -247,7 +257,8 @@ public class RetryCache {
    */
   private CacheEntry waitForCompletion(CacheEntry newEntry) {
     CacheEntry mapEntry = null;
-    synchronized (this) {
+    lock.lock();
+    try {
       mapEntry = set.get(newEntry);
       // If an entry in the cache does not exist, add a new one
       if (mapEntry == null) {
@@ -262,6 +273,8 @@ public class RetryCache {
       } else {
         retryCacheMetrics.incrCacheHit();
       }
+    } finally {
+      lock.unlock();
     }
     // Entry already exists in cache. Wait for completion and return its state
     Preconditions.checkNotNull(mapEntry,
@@ -292,8 +305,11 @@ public class RetryCache {
   public void addCacheEntry(byte[] clientId, int callId) {
     CacheEntry newEntry = new CacheEntry(clientId, callId, System.nanoTime()
         + expirationTime, true);
-    synchronized(this) {
+    lock.lock();
+    try {
       set.put(newEntry);
+    } finally {
+      lock.unlock();
     }
     retryCacheMetrics.incrCacheUpdated();
   }
@@ -303,8 +319,11 @@ public class RetryCache {
     // since the entry is loaded from editlog, we can assume it succeeded.    
     CacheEntry newEntry = new CacheEntryWithPayload(clientId, callId, payload,
         System.nanoTime() + expirationTime, true);
-    synchronized(this) {
+    lock.lock();
+    try {
       set.put(newEntry);
+    } finally {
+      lock.unlock();
     }
     retryCacheMetrics.incrCacheUpdated();
   }

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -43,6 +43,9 @@ Release 2.4.1 - UNRELEASED
     HDFS-6235. TestFileJournalManager can fail on Windows due to file locking if
     tests run out of order. (cnauroth)
 
+    HDFS-6229. Race condition in failover can cause RetryCache fail to work.
+    (jing9)
+
 Release 2.4.0 - 2014-04-07 
 
   INCOMPATIBLE CHANGES

+ 14 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -791,7 +791,19 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   public RetryCache getRetryCache() {
     return retryCache;
   }
-  
+
+  void lockRetryCache() {
+    if (retryCache != null) {
+      retryCache.lock();
+    }
+  }
+
+  void unlockRetryCache() {
+    if (retryCache != null) {
+      retryCache.unlock();
+    }
+  }
+
   /** Whether or not retry cache is enabled */
   boolean hasRetryCache() {
     return retryCache != null;
@@ -6917,8 +6929,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     if (cacheEntry != null && cacheEntry.isSuccess()) {
       return (String) cacheEntry.getPayload();
     }
-    writeLock();
     String snapshotPath = null;
+    writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
       checkNameNodeSafeMode("Cannot create snapshot for " + snapshotRoot);

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -1569,10 +1569,12 @@ public class NameNode implements NameNodeStatusMXBean {
     @Override
     public void writeLock() {
       namesystem.writeLock();
+      namesystem.lockRetryCache();
     }
     
     @Override
     public void writeUnlock() {
+      namesystem.unlockRetryCache();
       namesystem.writeUnlock();
     }