Browse Source

HADOOP-2031. Correctly maintain the taskid which takes the TIP to completion, failing which the case of lost tasktrackers isn't handled properly i.e. the map TIP is incorrectly left marked as 'complete' and it is never rescheduled elsewhere, leading to hung reduces. Contributed by Devaraj Das.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@583945 13f79535-47bb-0310-9956-ffa450edef68
Arun Murthy 18 years ago
parent
commit
ba25e38d9e
2 changed files with 29 additions and 6 deletions
  1. 6 0
      CHANGES.txt
  2. 23 6
      src/java/org/apache/hadoop/mapred/TaskInProgress.java

+ 6 - 0
CHANGES.txt

@@ -271,6 +271,12 @@ Trunk (unreleased changes)
     information via taskdetails.jsp. This bug was introduced by HADOOP-1874.
     information via taskdetails.jsp. This bug was introduced by HADOOP-1874.
     (Amar Kamat via acmurthy)
     (Amar Kamat via acmurthy)
                                 
                                 
+    HADOOP-2031.  Correctly maintain the taskid which takes the TIP to 
+    completion, failing which the case of lost tasktrackers isn't handled
+    properly i.e. the map TIP is incorrectly left marked as 'complete' and it
+    is never rescheduled elsewhere, leading to hung reduces.
+    (Devaraj Das via acmurthy)
+                                
   IMPROVEMENTS
   IMPROVEMENTS
 
 
     HADOOP-1908. Restructure data node code so that block sending and 
     HADOOP-1908. Restructure data node code so that block sending and 

+ 23 - 6
src/java/org/apache/hadoop/mapred/TaskInProgress.java

@@ -89,6 +89,9 @@ class TaskInProgress {
   // The 'next' usable taskid of this tip
   // The 'next' usable taskid of this tip
   int nextTaskId = 0;
   int nextTaskId = 0;
     
     
+  // The taskid that took this TIP to SUCCESS
+  private String successfulTaskId;
+  
   // Map from task Id -> TaskTracker Id, contains tasks that are
   // Map from task Id -> TaskTracker Id, contains tasks that are
   // currently runnings
   // currently runnings
   private TreeMap<String, String> activeTasks = new TreeMap<String, String>();
   private TreeMap<String, String> activeTasks = new TreeMap<String, String>();
@@ -243,6 +246,18 @@ class TaskInProgress {
     return !activeTasks.isEmpty();
     return !activeTasks.isEmpty();
   }
   }
     
     
+  private String getSuccessfulTaskid() {
+    return successfulTaskId;
+  }
+  
+  private void setSuccessfulTaskid(String successfulTaskId) {
+    this.successfulTaskId = successfulTaskId; 
+  }
+  
+  private void resetSuccessfulTaskid() {
+    this.successfulTaskId = ""; 
+  }
+  
   /**
   /**
    * Is this tip complete?
    * Is this tip complete?
    * 
    * 
@@ -253,18 +268,14 @@ class TaskInProgress {
   }
   }
 
 
   /**
   /**
-   * Is the given taskid in this tip complete?
+   * Is the given taskid the one that took this tip to completion?
    * 
    * 
    * @param taskid taskid of attempt to check for completion
    * @param taskid taskid of attempt to check for completion
    * @return <code>true</code> if taskid is complete, else <code>false</code>
    * @return <code>true</code> if taskid is complete, else <code>false</code>
    */
    */
   public boolean isComplete(String taskid) {
   public boolean isComplete(String taskid) {
-    TaskStatus status = taskStatuses.get(taskid);
-    if (status == null) {
-      return false;
-    }
     return ((completes > 0) && 
     return ((completes > 0) && 
-            (status.getRunState() == TaskStatus.State.SUCCEEDED));
+             getSuccessfulTaskid().equals(taskid));
   }
   }
 
 
   /**
   /**
@@ -473,6 +484,9 @@ class TaskInProgress {
     if (this.isMapTask() && isComplete(taskid) && 
     if (this.isMapTask() && isComplete(taskid) && 
         jobStatus.getRunState() != JobStatus.SUCCEEDED) {
         jobStatus.getRunState() != JobStatus.SUCCEEDED) {
       this.completes--;
       this.completes--;
+      
+      // Reset the successfulTaskId since we don't have a SUCCESSFUL task now
+      resetSuccessfulTaskid();
     }
     }
 
 
 
 
@@ -529,6 +543,9 @@ class TaskInProgress {
     //
     //
     completedTask(taskid, TaskStatus.State.SUCCEEDED);
     completedTask(taskid, TaskStatus.State.SUCCEEDED);
         
         
+    // Note the successful taskid
+    setSuccessfulTaskid(taskid);
+    
     //
     //
     // Now that the TIP is complete, the other speculative 
     // Now that the TIP is complete, the other speculative 
     // subtasks will be closed when the owning tasktracker 
     // subtasks will be closed when the owning tasktracker