소스 검색

MAPREDUCE-5746. Job diagnostics can implicate wrong task for a failed job. Contributed by Jason Lowe

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1567676 13f79535-47bb-0310-9956-ffa450edef68
Jason Darrell Lowe 11 년 전
부모
커밋
b0bd3410ce

+ 3 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -18,6 +18,9 @@ Release 0.23.11 - UNRELEASED
     MAPREDUCE-5454. TestDFSIO fails intermittently on JDK7 (Karthik Kambatla
     via jlowe)
 
+    MAPREDUCE-5746. Job diagnostics can implicate wrong task for a failed job
+    (jlowe)
+
 Release 0.23.10 - 2013-12-09
 
   INCOMPATIBLE CHANGES

+ 4 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java

@@ -344,8 +344,10 @@ public class JobHistoryParser implements HistoryEventHandler {
     taskInfo.finishTime = event.getFinishTime();
     taskInfo.error = StringInterner.weakIntern(event.getError());
     taskInfo.failedDueToAttemptId = event.getFailedAttemptID();
-    info.errorInfo = "Task " + taskInfo.taskId +" failed " +
-    taskInfo.attemptsMap.size() + " times ";
+    if (info.errorInfo.isEmpty()) {
+      info.errorInfo = "Task " + taskInfo.taskId + " failed " +
+          taskInfo.attemptsMap.size() + " times ";
+    }
   }
 
   private void handleTaskStartedEvent(TaskStartedEvent event) {

+ 39 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java

@@ -41,6 +41,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskID;
 import org.apache.hadoop.mapreduce.TypeConverter;
@@ -52,7 +53,9 @@ import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.AMInfo;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
+import org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent;
 import org.apache.hadoop.mapreduce.jobhistory.TaskFinishedEvent;
+import org.apache.hadoop.mapreduce.jobhistory.TaskStartedEvent;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.JobState;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
@@ -715,4 +718,40 @@ public class TestJobHistoryParsing {
     assertNull(test.getAMInfos());
 
   }
+
+  @Test
+  public void testMultipleFailedTasks() throws Exception {
+    JobHistoryParser parser =
+        new JobHistoryParser(Mockito.mock(FSDataInputStream.class));
+    EventReader reader = Mockito.mock(EventReader.class);
+    final AtomicInteger numEventsRead = new AtomicInteger(0); // Hack!
+    final org.apache.hadoop.mapreduce.TaskType taskType =
+        org.apache.hadoop.mapreduce.TaskType.MAP;
+    final TaskID[] tids = new TaskID[2];
+    JobID jid = new JobID("1", 1);
+    tids[0] = new TaskID(jid, taskType, 0);
+    tids[1] = new TaskID(jid, taskType, 1);
+    Mockito.when(reader.getNextEvent()).thenAnswer(
+        new Answer<HistoryEvent>() {
+          public HistoryEvent answer(InvocationOnMock invocation)
+              throws IOException {
+            // send two task start and two task fail events for tasks 0 and 1
+            int eventId = numEventsRead.getAndIncrement();
+            TaskID tid = tids[eventId & 0x1];
+            if (eventId < 2) {
+              return new TaskStartedEvent(tid, 0, taskType, "");
+            }
+            if (eventId < 4) {
+              TaskFailedEvent tfe = new TaskFailedEvent(tid, 0, taskType,
+                  "failed", "FAILED", null);
+              tfe.setDatum(tfe.getDatum());
+              return tfe;
+            }
+            return null;
+          }
+        });
+    JobInfo info = parser.parse(reader);
+    assertTrue("Task 0 not implicated",
+        info.getErrorInfo().contains(tids[0].toString()));
+  }
 }