Browse Source

HADOOP-1234. Fix a race condition in the file cache that caused tasktracker to not be able to find cached files. Contributed by Arun.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@547793 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 years ago
parent
commit
15db256066
2 changed files with 20 additions and 14 deletions
  1. 4 0
      CHANGES.txt
  2. 16 14
      src/java/org/apache/hadoop/filecache/DistributedCache.java

+ 4 - 0
CHANGES.txt

@@ -133,6 +133,10 @@ Trunk (unreleased changes)
  42. HADOOP-1472.  Fix so that timed-out tasks are counted as failures
      rather than as killed.  (Arun C Murthy via cutting)
 
+ 43. HADOOP-1234.  Fix a race condition in file cache that caused
+     tasktracker to not be able to find cached files.
+     (Arun C Murthy via cutting)
+
 
 Release 0.13.0 - 2007-06-08
 

+ 16 - 14
src/java/org/apache/hadoop/filecache/DistributedCache.java

@@ -68,23 +68,23 @@ public class DistributedCache {
     CacheStatus lcacheStatus;
     Path localizedPath;
     synchronized (cachedArchives) {
-      if (!cachedArchives.containsKey(cacheId)) {
+      lcacheStatus = cachedArchives.get(cacheId);
+      if (lcacheStatus == null) {
         // was never localized
         lcacheStatus = new CacheStatus();
         lcacheStatus.currentStatus = false;
-        lcacheStatus.refcount = 1;
+        lcacheStatus.refcount = 0;
         lcacheStatus.localLoadPath = new Path(baseDir, new Path(cacheId));
         cachedArchives.put(cacheId, lcacheStatus);
-      } else {
-        lcacheStatus = (CacheStatus) cachedArchives.get(cacheId);
-        synchronized (lcacheStatus) {
-          lcacheStatus.refcount++;
-        }
+      }
+      
+      synchronized (lcacheStatus) {
+        localizedPath = localizeCache(cache, lcacheStatus, conf, isArchive, 
+                                      md5, currentWorkDir);
+        lcacheStatus.refcount++;
       }
     }
-    synchronized (lcacheStatus) {
-      localizedPath = localizeCache(cache, lcacheStatus, conf, isArchive, md5, currentWorkDir);
-    }
+
     // try deleting stuff if you can
     long size = FileUtil.getDU(new File(baseDir.toString()));
     // setting the cache size to a default of 1MB
@@ -125,10 +125,12 @@ public class DistributedCache {
       for (Iterator it = cachedArchives.keySet().iterator(); it.hasNext();) {
         String cacheId = (String) it.next();
         CacheStatus lcacheStatus = (CacheStatus) cachedArchives.get(cacheId);
-        if (lcacheStatus.refcount == 0) {
-          // delete this cache entry
-          FileSystem.getLocal(conf).delete(lcacheStatus.localLoadPath);
-          it.remove();
+        synchronized (lcacheStatus) {
+          if (lcacheStatus.refcount == 0) {
+            // delete this cache entry
+            FileSystem.getLocal(conf).delete(lcacheStatus.localLoadPath);
+            it.remove();
+          }
         }
       }
     }