浏览代码

HADOOP-5233. Addresses the three issues - Race condition in updating status, NPE in TaskTracker task localization when the conf file is missing (HADOOP-5234) and NPE in handling KillTaskAction of a cleanup task (HADOOP-5235). Contributed by Amareshwari Sriramadasu.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@746227 13f79535-47bb-0310-9956-ffa450edef68
Devaraj Das 16 年之前
父节点
当前提交
a468abc52d

+ 5 - 0
CHANGES.txt

@@ -808,6 +808,11 @@ Release 0.20.0 - Unreleased
     Scheduler accesses the tasktrackers stored by the JobTracker.
     (Rahul Kumar Singh via yhemanth)
 
+    HADOOP-5233. Addresses the three issues - Race condition in updating
+    status, NPE in TaskTracker task localization when the conf file is missing
+    (HADOOP-5234) and NPE in handling KillTaskAction of a cleanup task (HADOOP-5235).
+    (Amareshwari Sriramadasu via ddas)
+
 Release 0.19.1 - Unreleased
 
   IMPROVEMENTS

+ 2 - 0
src/mapred/org/apache/hadoop/mapred/JobInProgress.java

@@ -815,6 +815,8 @@ class JobInProgress {
         } else {
           reduceCleanupTasks.add(taskid);
         }
+        // Remove the task entry from jobtracker
+        jobtracker.removeTaskEntry(taskid);
       }
       //For a failed task update the JT datastructures. 
       else if (state == TaskStatus.State.FAILED ||

+ 2 - 4
src/mapred/org/apache/hadoop/mapred/Task.java

@@ -514,8 +514,7 @@ abstract class Task implements Writable, Configurable {
           if (sendProgress) {
             // we need to send progress update
             updateCounters();
-            taskStatus.statusUpdate(getState(),
-                                    taskProgress.get(),
+            taskStatus.statusUpdate(taskProgress.get(),
                                     taskProgress.toString(), 
                                     counters);
             taskFound = umbilical.statusUpdate(taskId, taskStatus);
@@ -702,8 +701,7 @@ abstract class Task implements Writable, Configurable {
   private void sendLastUpdate(TaskUmbilicalProtocol umbilical) 
   throws IOException {
     // send a final status report
-    taskStatus.statusUpdate(getState(),
-                            taskProgress.get(),
+    taskStatus.statusUpdate(taskProgress.get(),
                             taskProgress.toString(), 
                             counters);
     statusUpdate(umbilical);

+ 0 - 1
src/mapred/org/apache/hadoop/mapred/TaskInProgress.java

@@ -915,7 +915,6 @@ class TaskInProgress {
       t.setTaskCleanupTask();
       t.setState(taskStatuses.get(taskid).getRunState());
       cleanupTasks.put(taskid, taskTracker);
-      jobtracker.removeTaskEntry(taskid);
     }
     t.setConf(conf);
     LOG.debug("Launching task with skipRanges:"+failedRanges.getSkipRanges());

+ 4 - 5
src/mapred/org/apache/hadoop/mapred/TaskStatus.java

@@ -54,7 +54,7 @@ abstract class TaskStatus implements Writable, Cloneable {
   private long finishTime; 
   private long outputSize;
     
-  private Phase phase = Phase.STARTING; 
+  private volatile Phase phase = Phase.STARTING; 
   private Counters counters;
   private boolean includeCounters;
   private SortedRanges.Range nextRecordRange = new SortedRanges.Range();
@@ -267,16 +267,15 @@ abstract class TaskStatus implements Writable, Cloneable {
   /**
    * Update the status of the task.
    * 
-   * @param runstate
+   * This update is done by ping thread before sending the status. 
+   * 
    * @param progress
    * @param state
    * @param counters
    */
-  synchronized void statusUpdate(State runState, 
-                                 float progress,
+  synchronized void statusUpdate(float progress,
                                  String state, 
                                  Counters counters) {
-    setRunState(runState);
     setProgress(progress);
     setStateString(state);
     setCounters(counters);

+ 19 - 4
src/mapred/org/apache/hadoop/mapred/TaskTracker.java

@@ -2050,10 +2050,16 @@ public class TaskTracker
       if (this.done || 
           (this.taskStatus.getRunState() != TaskStatus.State.RUNNING &&
           this.taskStatus.getRunState() != TaskStatus.State.COMMIT_PENDING &&
-          !isCleaningup())) {
+          !isCleaningup()) ||
+          ((this.taskStatus.getRunState() == TaskStatus.State.COMMIT_PENDING ||
+           this.taskStatus.getRunState() == TaskStatus.State.FAILED_UNCLEAN ||
+           this.taskStatus.getRunState() == TaskStatus.State.KILLED_UNCLEAN) &&
+           taskStatus.getRunState() == TaskStatus.State.RUNNING)) {
         //make sure we ignore progress messages after a task has 
         //invoked TaskUmbilicalProtocol.done() or if the task has been
-        //KILLED/FAILED
+        //KILLED/FAILED/FAILED_UNCLEAN/KILLED_UNCLEAN
+        //Also ignore progress update if the state change is from 
+        //COMMIT_PENDING/FAILED_UNCLEAN/KILLED_UNCLEA to RUNNING
         LOG.info(task.getTaskID() + " Ignoring status-update since " +
                  ((this.done) ? "task is 'done'" : 
                                 ("runState: " + this.taskStatus.getRunState()))
@@ -2407,7 +2413,10 @@ public class TaskTracker
         if (wasFailure) {
           failures += 1;
         }
-        runner.kill();
+        // runner could be null if task-cleanup attempt is not localized yet
+        if (runner != null) {
+          runner.kill();
+        }
         setTaskFailState(wasFailure);
       } else if (taskStatus.getRunState() == TaskStatus.State.UNASSIGNED) {
         if (wasFailure) {
@@ -2486,6 +2495,11 @@ public class TaskTracker
       }
       synchronized (this) {
         try {
+          // localJobConf could be null if localization has not happened
+          // then no cleanup will be required.
+          if (localJobConf == null) {
+            return;
+          }
           String taskDir = getLocalTaskDir(task.getJobID().toString(),
                              taskId.toString(), task.isTaskCleanupTask());
           if (needCleanup) {
@@ -2622,7 +2636,8 @@ public class TaskTracker
   public synchronized void commitPending(TaskAttemptID taskid,
                                          TaskStatus taskStatus) 
   throws IOException {
-    LOG.info("Task " + taskid + " is in COMMIT_PENDING");
+    LOG.info("Task " + taskid + " is in commit-pending," +"" +
+             " task state:" +taskStatus.getRunState());
     statusUpdate(taskid, taskStatus);
     reportTaskFinished(taskid, true);
   }