Browse Source

HADOOP-568. Fix so that errors while initializing tasks mark the task failed. Contributed by Owen.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@452645 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 19 years ago
parent
commit
dd20cfa6cf
2 changed files with 25 additions and 4 deletions
  1. 4 0
      CHANGES.txt
  2. 21 4
      src/java/org/apache/hadoop/mapred/TaskTracker.java

+ 4 - 0
CHANGES.txt

@@ -124,6 +124,10 @@ Trunk (unreleased changes)
     version' comand is added to show this information, and it is also
     version' comand is added to show this information, and it is also
     added to various web interfaces.  (omalley via cutting)
     added to various web interfaces.  (omalley via cutting)
 
 
+30. HADOOP-568.  Fix so that errors while initializing tasks on a
+    tasktracker correctly report the task as failed to the jobtracker,
+    so that it will be rescheduled.  (omalley via cutting)
+
 
 
 Release 0.6.2 - 2006-09-18
 Release 0.6.2 - 2006-09-18
 
 

+ 21 - 4
src/java/org/apache/hadoop/mapred/TaskTracker.java

@@ -674,10 +674,19 @@ public class TaskTracker
           reduceTotal++;
           reduceTotal++;
         }
         }
       }
       }
-      try{
+      try {
     	  localizeJob(tip);
     	  localizeJob(tip);
-      }catch(IOException ie){
-    	  LOG.warn("Error initializing Job " + tip.getTask().getJobId());
+      } catch (IOException ie) {
+        String msg = ("Error initializing " + tip.getTask().getTaskId() + 
+                      ":\n" + StringUtils.stringifyException(ie));
+        LOG.warn(msg);
+        tip.reportDiagnosticInfo(msg);
+        try {
+          tip.killAndCleanup(true);
+        } catch (IOException ie2) {
+          LOG.info("Error cleaning up " + tip.getTask().getTaskId() + ":\n" +
+                   StringUtils.stringifyException(ie2));          
+        }
       }
       }
     }
     }
     
     
@@ -995,7 +1004,8 @@ public class TaskTracker
         }
         }
 
 
         /**
         /**
-         * This task has run on too long, and should be killed.
+         * Something went wrong and the task must be killed.
+         * @param wasFailure was it a failure (versus a kill request)?
          */
          */
         public synchronized void killAndCleanup(boolean wasFailure
         public synchronized void killAndCleanup(boolean wasFailure
                                                 ) throws IOException {
                                                 ) throws IOException {
@@ -1005,6 +1015,13 @@ public class TaskTracker
                   failures += 1;
                   failures += 1;
                 }
                 }
                 runner.kill();
                 runner.kill();
+            } else if (runstate == TaskStatus.State.UNASSIGNED) {
+              if (wasFailure) {
+                failures += 1;
+                runstate = TaskStatus.State.FAILED;
+              } else {
+                runstate = TaskStatus.State.KILLED;
+              }
             }
             }
         }
         }