Browse Source

HADOOP-791. Fix a deadlock in the task tracker. Contributed by Mahadev.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@487691 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 years ago
parent
commit
6e51b11b2c
2 changed files with 30 additions and 19 deletions
  1. 3 0
      CHANGES.txt
  2. 27 19
      src/java/org/apache/hadoop/mapred/TaskTracker.java

+ 3 - 0
CHANGES.txt

@@ -110,6 +110,9 @@ Release 0.9.2 - 2006-12-15
  2. HADOOP-827. Turn off speculative execution by default, since it's
     currently broken.  (omalley via cutting)
 
+ 3. HADOOP-791. Fix a deadlock in the task tracker.
+    (Mahadev Konar via cutting)
+
 
 Release 0.9.1 - 2006-12-06
 

+ 27 - 19
src/java/org/apache/hadoop/mapred/TaskTracker.java

@@ -1130,28 +1130,33 @@ public class TaskTracker
          * We no longer need anything from this task, as the job has
          * finished.  If the task is still running, kill it (and clean up
          */
-        public synchronized void jobHasFinished() throws IOException {
-        	 
-            if (getRunState() == TaskStatus.State.RUNNING) {
+        public void jobHasFinished() throws IOException {
+          boolean killTask = false;  
+          synchronized(this){
+              killTask = (getRunState() == TaskStatus.State.RUNNING);
+              if (killTask) {
                 killAndCleanup(false);
-            } else {
-                cleanup();
-            }
-            if (keepJobFiles)
-              return;
-            
-            // Delete temp directory in case any task used PhasedFileSystem.
-            try{
-              String systemDir = task.getConf().get("mapred.system.dir");
-              Path taskTempDir = new Path(systemDir + "/" + 
-                  task.getJobId() + "/" + task.getTipId());
-              if( fs.exists(taskTempDir)){
-                fs.delete(taskTempDir) ;
               }
-            }catch(IOException e){
-              LOG.warn("Error in deleting reduce temporary output",e); 
+          }
+          if (!killTask) {
+            cleanup();
+          }
+          if (keepJobFiles)
+            return;
+              
+          synchronized(this){
+              // Delete temp directory in case any task used PhasedFileSystem.
+              try{
+                String systemDir = task.getConf().get("mapred.system.dir");
+                Path taskTempDir = new Path(systemDir + "/" + 
+                    task.getJobId() + "/" + task.getTipId() + "/" + task.getTaskId());
+                if( fs.exists(taskTempDir)){
+                  fs.delete(taskTempDir) ;
+                }
+              }catch(IOException e){
+                LOG.warn("Error in deleting reduce temporary output",e); 
+              }
             }
-            
             // Delete the job directory for this  
             // task if the job is done/failed
             if (purgeJobFiles) {
@@ -1205,6 +1210,9 @@ public class TaskTracker
          * We no longer need anything from this task.  Either the 
          * controlling job is all done and the files have been copied
          * away, or the task failed and we don't need the remains.
+         * Any calls to cleanup should not lock the tip first.
+         * cleanup does the right thing- updates tasks in Tasktracker
+         * by locking tasktracker first and then locks the tip.
          */
         void cleanup() throws IOException {
             String taskId = task.getTaskId();