浏览代码

MAPREDUCE-7353: Mapreduce job fails when NM is stopped. Contributed by Bilwa S T (BilwaST)

(cherry picked from commit 7581413156da396db218e36a966c5749589b31a7)
Eric Payne 3 年之前
父节点
当前提交
e395711164

+ 6 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java

@@ -492,10 +492,15 @@ public abstract class TaskAttemptImpl implements
           TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP,
           TaskAttemptEventType.TA_DIAGNOSTICS_UPDATE,
           DIAGNOSTIC_INFORMATION_UPDATE_TRANSITION)
+      .addTransition(TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP,
+          EnumSet.of(TaskAttemptStateInternal.SUCCEEDED,
+              TaskAttemptStateInternal.KILLED),
+          TaskAttemptEventType.TA_KILL,
+          new KilledAfterSuccessTransition())
       // Ignore-able events
      .addTransition(TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP,
          TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP,
-         EnumSet.of(TaskAttemptEventType.TA_KILL,
+         EnumSet.of(
              TaskAttemptEventType.TA_FAILMSG,
              TaskAttemptEventType.TA_FAILMSG_BY_CLIENT,
              TaskAttemptEventType.TA_TIMED_OUT,

+ 30 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java

@@ -1870,6 +1870,36 @@ public class TestTaskAttempt{
     createReduceTaskAttemptImplForTest(eventHandler, clock, jobConf);
   }
 
+  @Test
+  public void testKillingTaskWhenContainerCleanup() {
+    MockEventHandler eventHandler = new MockEventHandler();
+    TaskAttemptImpl taImpl = createTaskAttemptImpl(eventHandler);
+    TaskId maptaskId = MRBuilderUtils.newTaskId(taImpl.getID().getTaskId()
+        .getJobId(), 1, TaskType.MAP);
+    TaskAttemptId mapTAId =
+        MRBuilderUtils.newTaskAttemptId(maptaskId, 0);
+
+    // move in two steps to the desired state (cannot get there directly)
+    taImpl.handle(new TaskAttemptEvent(taImpl.getID(),
+        TaskAttemptEventType.TA_DONE));
+    assertEquals("Task attempt's internal state is not " +
+        "SUCCESS_FINISHING_CONTAINER",
+        TaskAttemptStateInternal.SUCCESS_FINISHING_CONTAINER,
+        taImpl.getInternalState());
+
+    taImpl.handle(new TaskAttemptEvent(taImpl.getID(),
+        TaskAttemptEventType.TA_TIMED_OUT));
+    assertEquals("Task attempt's internal state is not " +
+        "SUCCESS_CONTAINER_CLEANUP",
+        TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP,
+        taImpl.getInternalState());
+
+    taImpl.handle(new TaskAttemptKillEvent(mapTAId, "", true));
+    assertEquals("Task attempt is not in KILLED state",
+        TaskAttemptState.KILLED,
+        taImpl.getState());
+  }
+
   @Test
   public void testTooManyFetchFailureWhileContainerCleanup() {
     MockEventHandler eventHandler = new MockEventHandler();